import pandas as pd
import random
import ast

data = '/NS/llm-1/nobackup/qwu/dataset/sync_random_o/105-21.csv'
data_df = pd.read_csv(data)
facts = data_df['Fact']
alternative_facts = data_df['Alternate Facts']
all_facts = []
#first to collect all the fact
for alternative_fact in alternative_facts:
    alternative_fact = ast.literal_eval(alternative_fact)
    for fact in alternative_fact:
        all_facts.append(fact)
#get unique facts
all_facts = list(set(all_facts))
print(len(all_facts))

new_alternative_facts = []
for fact, alternative_fact in zip(facts, alternative_facts):
    alternative_fact = ast.literal_eval(alternative_fact)
    if len(alternative_fact) < 99:
        print('Need to add more facts')
        print(len(alternative_fact))
        # print(fact)
        #filter out the fact that is already in the alternative fact and fact
        all_facts = [x for x in all_facts if x not in alternative_fact]
        all_facts = [x for x in all_facts if x != fact]

        #randomly select 99 - len(alternative_fact) facts from all_facts
        selected_facts = random.sample(all_facts, 99 - len(alternative_fact))
        alternative_fact.extend(selected_facts)
        print('After adding more facts')
        print(len(alternative_fact))
        #convert to string
        alternative_fact = str(alternative_fact)
        new_alternative_facts.append(alternative_fact)
    else:
        new_alternative_facts.append(alternative_fact)

data_df['Alternate Facts'] = new_alternative_facts
#save as a new file
data_df.to_csv(data, index=False)
print('done')


