import pandas as pd 
import argparse



if __name__ == "__main__":
    
    parser = argparse.ArgumentParser()
    
    parser.add_argument('--label', default='Big_Nose', choices=['Male', 'Big_Nose', 'Pointy_Nose', 'Eyeglasses', 'Narrow_Eyes'], type=str,
                        help='the label for the cv dataset')
    parser.add_argument('--proportion', default=.01, type=float,
                        help='proportion of data')
    
    args = parser.parse_args()
    label = args.label
    proportion = args.proportion

    df_attr = pd.read_csv('data_cv/list_attr_celeba.csv')
    df_attr.set_index('image_id', inplace=True)
    df_attr.replace(to_replace=-1, value=0, inplace=True) #replace -1 by 0


    stop_idx = int(len(df_attr)*proportion)
    df_attr = df_attr.iloc[:stop_idx+1, :]
    print(df_attr.index.tolist()[-1])

    attr_dict = dict(zip(df_attr.index, df_attr[label]))


    import pickle

    with open(f'data_cv/label_{label}.dict', 'wb') as f:
        pickle.dump(attr_dict, f)