import numpy as np

def generateZipf(n, a=1):
    '''
    Create data corresponding to a Zipf distribution
    over n elements with freq n/i^a for element i
    
    Calculate the error of the sketch
    '''
    return np.array([n//(i**a) for i in range(1, n+1)])

def loadAOL(verbose=False):
    dataPath = 'data/aol_actual/'
    predPath = 'data/aol_predictions/'
    ids = np.arange(10, 92, dtype=int)
    data = []
    preds = []
    if verbose:
        print('Loading...')
    for id in ids:
        try:
            data.append(np.load(dataPath+'aol_actual_counts_{}.npz'.format(id))['arr_0'])
            preds.append(np.load(predPath+'aol_oracle_{}.npz'.format(id))['arr_0'])
            if verbose:
                print(id)
        except FileNotFoundError:
           pass 

    return data, preds

def loadCAIDA(verbose=False):
    dataPath = 'data/ip_actual/'
    predPath = 'data/ip_predictions/'
    ids = np.arange(131000, 135910, 100, dtype=int)
    data = []
    preds = []
    if verbose:
        print('Loading...')
    for id in ids:
        try:
            data.append(np.load(dataPath+'ip_{}_actual.npz'.format(id))['actual'])
            preds.append(np.load(predPath+'ip_{}_predictions.npz'.format(id))['predictions'])
            if verbose:
                print(id)
        except FileNotFoundError:
           pass 

    return data, preds

if __name__ == '__main__':
    loadAOL(verbose=True)
    loadCAIDA(verbose=True)
