import os 
import random

import pandas as pd

print('add DATA_PATH')
#DATA_PATH =

def rmv_file(synset, filename):
    synset_path = os.path.join(DATA_PATH, synset)
    os.remove(os.path.join(synset_path, filename))
    return os.path.join(synset_path, filename)

if __name__ == '__main__':
    random.seed(42)
    import pdb; pdb.set_trace()
    data_dirs = os.listdir(DATA_PATH)
    data_dirs = [d for d in data_dirs if os.path.isdir(os.path.join(DATA_PATH, d))]
    numb_obs = [len(os.listdir(os.path.join(DATA_PATH, d))) for d in data_dirs]
    df = pd.DataFrame({'synset': data_dirs, 'numb_obs': numb_obs})
    removed_files=[]
    synsets_rmv = []
    small_subset = random.sample(data_dirs, k=11)
    rm_images = list(range(1,12))
    for i, d in enumerate(small_subset):
        obs_synset = os.listdir(os.path.join(DATA_PATH, d))
        numb_images = len(obs_synset)
        elim = numb_images-rm_images[i]
        files_2_rmv = random.sample(obs_synset,elim)
        synsets_rmv += [d]*elim
        removed_files += files_2_rmv
        map_return = list(map(rmv_file, [d]*elim, files_2_rmv)) 
    rmv_df = pd.DataFrame({'synset':synsets_rmv, 'files_removed': removed_files})
    csv_path = os.path.join(DATA_PATH, 'removed_obs.csv')
    if os.path.exists(csv_path):
        rmv_df.to_csv(csv_path, mode='a', index=False, header=False)
    else:
        rmv_df.to_csv(csv_path, index=False)
    numb_obs = [len(os.listdir(os.path.join(DATA_PATH, d))) for d in data_dirs]
    df_2 = pd.DataFrame({'synset': data_dirs, 'numb_obs': numb_obs})
    print('check df_2 for numb_obs column')
    import pdb; pdb.set_trace()
