import os
import pandas as pd
import json
# urbancars_country = os.listdir('/media/exx/HDD/rwiddhic/aug_datasets/ConBias/UrbanCars/country')
# urbancars_urban = os.listdir('/media/exx/HDD/rwiddhic/aug_datasets/ConBias/UrbanCars/urban')

# metadata = pd.DataFrame(columns=['img_filename', 'y', 'place'])
# print(len(urbancars_country))
# print(len(urbancars_urban))

# for img in urbancars_country:
#     metadata = metadata._append({'img_filename': f'/media/exx/HDD/rwiddhic/aug_datasets/ConBias/UrbanCars/country/{img}', 'y': 0, 'place': 0}, ignore_index=True)

# for img in urbancars_urban:
#     metadata = metadata._append({'img_filename': f'/media/exx/HDD/rwiddhic/aug_datasets/ConBias/UrbanCars/urban/{img}', 'y': 1, 'place': 0}, ignore_index=True)

# #Sample 1300 images from the dataset
# metadata = metadata.sample(1300)
# metadata.to_csv('/media/exx/HDD/rwiddhic/aug_datasets/ConBias/UrbanCars/additional_metadata_uc.csv', index=False)
metadata = pd.DataFrame(columns=['img_filename', 'y', 'place'])

with open('/media/exx/HDD/rwiddhic/coco/annotations/gender_annotations_corrected.json', 'r') as f:
    gender_data = json.load(f)

gender_man = gender_data['man'].keys()
gender_woman = gender_data['woman'].keys()

coco_gb = os.listdir('/media/exx/HDD/rwiddhic/aug_datasets/ALIA/COCO-GB/')
coco_gb = [img for img in coco_gb if img.endswith('.jpg')]
metadata = pd.DataFrame(columns=['img_filename', 'y', 'place'])
for img in coco_gb:
    img_name = img.split('_')[0].lstrip('0')
    assert img_name in gender_woman or img_name in gender_man
    if img_name in gender_man:
        y = 1
    elif img_name in gender_woman:
        y = 0
        
    metadata = metadata._append({'img_filename': f'/media/exx/HDD/rwiddhic/aug_datasets/ALIA/COCO-GB/{img}', 'y': y, 'place': 0}, ignore_index=True)

#metadata = metadata.sample(260)
metadata.to_csv('/media/exx/HDD/rwiddhic/aug_datasets/ALIA/COCO-GB/additional_metadata_coco.csv', index=False)


print("Shape of metadata: ", metadata.shape)