import pandas as pd
import numpy as np


file = pd.read_csv('magenet_coarse_metaclass.csv')

# for each unique file['Supercategory'], randomly pick two Category, and save the corresponding file['Category'] to a list

# return a counter of the unique keys and values in file['Supercategory']
supercategories = file['Supercategory'].unique()
# # print(supercategories)
# print(len(supercategories))
# print(file['Supercategory'].value_counts())

id1 = []
id2 = []
category_list1 = []
category_list2 = []
for supercategory in supercategories:
    categories = file[file['Supercategory'] == supercategory]['Category'].unique()
    if len(categories) <= 1:
        continue
    selected_categories = np.random.choice(categories, 2, replace=False)
    # find the corresponding file['ID'] for the selected categories
    id1.append(file[file['Category'] == selected_categories[0]]['ID'].values[0])
    id2.append(file[file['Category'] == selected_categories[1]]['ID'].values[0])
    category_list1.append(selected_categories[0])
    category_list2.append(selected_categories[1])

print(category_list1)
print(category_list2)

# save the list to a csv file
df = pd.DataFrame({'ID': id1, 'Category': category_list1})
df.to_csv('metaclass_member.csv', index=False)

df = pd.DataFrame({'ID': id2, 'Category': category_list2})
df.to_csv('metaclass_nonmember.csv', index=False)
