import pandas as pd
import os
import random

#First we sample 400 landbird images and 400 waterbird images from the ALIA dataset
#We then create a metadata file with the image paths and labels
def sample_images(k=400):
    landbird_images = os.listdir('/media/exx/HDD/rwiddhic/aug_datasets/ConBias/Waterbirds/landbird')
    waterbird_images = os.listdir('/media/exx/HDD/rwiddhic/aug_datasets/ConBias/Waterbirds/waterbird')

    landbird_images = random.sample(landbird_images, k)
    waterbird_images = random.sample(waterbird_images, k)

    return landbird_images, waterbird_images

landbird_images, waterbird_images = sample_images()

df = pd.DataFrame(columns=['img_filename', 'y'])
#Add landbird images to the dataframe
for image in landbird_images:
    df = df._append({'img_filename': f'/media/exx/HDD/rwiddhic/aug_datasets/ConBias/Waterbirds/landbird/{image}', 'y': 0}, ignore_index=True)
#Add waterbird images to the dataframe
for image in waterbird_images:
    df = df._append({'img_filename': f'/media/exx/HDD/rwiddhic/aug_datasets/ConBias/Waterbirds/waterbird/{image}', 'y': 1}, ignore_index=True)

df.to_csv('/media/exx/HDD/rwiddhic/aug_datasets/ConBias/Waterbirds/additional_metadata.csv', index=False)