import time
import os
import sys
import numpy as np
import pandas as pd

start_time = time.time()

ALGO = str(sys.argv[1])
SAMPLE_SIZE = int(sys.argv[2])
NB_REPS = int(sys.argv[3])
RES_FOLDER = str(sys.argv[4])

test_size = 10000
n_reps_10perc = int(NB_REPS / 10)

path = os.path.join(RES_FOLDER, ALGO, f'n_{SAMPLE_SIZE}')

list_df = []
true_n_reps = 0
for i in range(NB_REPS):
    rep_file = os.path.join(path, f'rep_{i+1}.h5')
    try:
        list_df.append(pd.read_hdf(rep_file, 'res'))
        os.remove(rep_file)
        true_n_reps += 1
    except:
        pass
    try:
        if (i+1) % n_reps_10perc == 0:
            print(f'{i+1} replications out of {NB_REPS} combined')
    except:
        pass

print(true_n_reps)

df = pd.concat(list_df)
if not os.path.exists(os.path.join(path, 'all_reps.h5')):
    df.to_hdf(os.path.join(path, 'all_reps.h5'), 'all_reps')

print(' ')
print('Finished combining:', time.time() - start_time)
