import pandas as pd
import json
import os

ROOT = '/home/ubuntu/statement_reps/datasets'

relations = [
    "In {}, they understand",
    "{} is a professional",
    "The occupation of {} is",
    "{} passed away at",
    "The twin city of {} is",
    "The language of {} is",
    "{} is based in",
    "{} can be found in",
    "{}'s music label is",
    "{}'s record company is",
    "{} is located in the country of",
    "The mother tongue of {} is",
    "{} was created by",
    "{} was created in the country of",
    "{}'s record label is",
    "{} was formulated in",
    "{} is a native speaker of",
    "{} originated in",
    "{} is a part of the continent of",
    "{} is owned by",
    "{} is affiliated with the religion",
    "{} was developed by",
    "{} specializes in",
    "{} is known for performing",
    "The language of {} was",
    "{} was originally aired on",
    "{} is a twin city of",
    "{} plays",
    "{} passed away in",
    "The domain of work of {} is",
    "{} is a citizen of",
    "{} died in the city of",
    "{}'s expertise is",
    "{} has the position of",
    "{} is represented by music label",
    "{}'s profession is a",
    "{} belongs to the continent of",
    "The headquarters of {} is in",
    "{} recorded for",
    "{} plays the",
    "{} was named for",
    "{} was written in",
    "The headquarter of {} is in",
    "{} was from",
    "{}'s profession is an",
    "{}'s area of work is",
    "In {}, an official language is",
    "The native language of {} is",
    "{} was released on",
    "{} was called after",
    "{} is represented by",
    "{} debuted on",
    "The headquarter of {} is located in",
    "{} is called after",
    "{} plays as",
    "{} was a product of",
    "{}'s capital is",
    "{} used to work in",
    "{} is a part of the",
    "{} is written in",
    "{}'s headquarters are in",
    "{} died in",
    "The music label representing {} is",
    "The capital city of {} is",
    "{} professionally plays the sport",
    "{}'s domain of work is",
    "{} is developed by",
    "{}'s life ended in",
    "{} is represented by record label",
    "{} worked in the city of",
    "The original language of {} is",
    "{} works in the area of",
    "{} died at",
    "{}'s occupation is",
    "The domain of activity of {} is",
    "{} plays the instrument",
    "{} is a product of",
    "{} is in",
    "In {}, the language spoken is",
    "{}'s position is",
    "{} originated from",
    "{} is named after",
    "{} is native to",
    "{} belongs to the organization of",
    "{} premieres on",
    "{} is within",
    "{} was named after",
    "The profession of {} is",
    "{} is produced by",
    "{} originates from",
    "{} works in the field of",
    "{} holds the position of",
    "{} started in",
    "{} was founded in",
    "{} is a",
    "{} is from",
    "{} has a citizenship from",
    "{} is located in the continent",
    "{} works as",
    "{} holds a citizenship from",
    "{} spoke the language",
    "The language used by {} is",
    "{} speaks the language",
    "{} was developed in",
    "{}'s label is",
    "{} speaks",
    "{} was started in",
    "The law in {} declares the language",
    "{} plays in the position of",
    "{} succumbed at",
    "{} is follower of",
    "The location of {} is",
    "The capital of {} is",
    "The original language of {} was",
    "{} performs on the",
    "{} expired at",
    "{} was created in",
    "{} was employed in",
    "{} is headquartered in",
    "{} is located in",
    "{} is to debut on",
    "{} was native to",
    "{} was formed in",
    "{} took up work in",
    "The music label that is representing {} is",
    "The genre played by {} is",
    "{} is employed by",
    "{} formed in",
    "{} is named for",
    "The expertise of {} is",
    "The official language of {} is",
    "{} found employment in",
    "{} works for",
    "{} writes in",
    "{} follows the religion of",
    "The official religion of {} is",
    "{}'s domain of activity is",
    "{} is affiliated with",
    "{} lost their life at",
    "{} performs",
    "{} is created by",
    "{} is originally from",
    "{} is a member of",
    "{} is affiliated with the",
    "{} was originally from",
    "{}'s capital city is",
    "{} was born in",
    "{} premiered on",
    "{} holds the title of",
    "{} worked in",
]

with open(os.path.join(ROOT, 'counterfact.json')) as f:
    data = json.load(f)

df_out = {
    'statement' : [],
    'label' : [],
    'relation' : [],
    'subject' : [],
    'target' : [],
    'true_target' : [],
}

for case in data:
    relation = case['requested_rewrite']['prompt']
    if relation not in relations:
        continue
    subject = case['requested_rewrite']['subject']
    target_true = case['requested_rewrite']['target_true']['str']
    target_false = case['requested_rewrite']['target_new']['str']
    true_statement = relation.replace('{}', subject) + ' ' + target_true + '.'
    false_statement = relation.replace('{}', subject) + ' ' + target_false + '.'

    df_out['statement'].append(true_statement), df_out['statement'].append(false_statement)
    df_out['label'].append(1), df_out['label'].append(0)
    df_out['relation'].append(relation), df_out['relation'].append(relation)
    df_out['subject'].append(subject), df_out['subject'].append(subject)
    df_out['target'].append(target_true), df_out['target'].append(target_false)
    df_out['true_target'].append(target_true), df_out['true_target'].append(target_true)

df_out = pd.DataFrame(df_out)
df_out.to_csv(os.path.join(ROOT, 'counterfact_true_false.csv'), index=False)

