import pandas as pd
import ast
import os

treatment_mapping = {'top-none': 1, 'top-top': 2, 'top-rank': 3, 'rank-none': 4, 'rank-top': 5, 'rank-rank': 6, 'subset3-rank': 7, 'top-subset3': 8, 'subset2-subset2': 9}

complete_responses=pd.read_csv('responses.csv')
complete_responses['votes'][0]="['China']"
complete_responses.head()

# File paths
file_paths = [
    r'geography.csv',
    r'movies.csv',
    r'paintings.csv'
]


dataframes = []

# Read each CSV file into a DataFrame
for file_path in file_paths:
    df = pd.read_csv(file_path)
    dataframes.append(df)


geography_df, movies_df, paintings_df = dataframes

geography_df['title'][2]="United States"
geography_df.head()

geography_dict = {v: k for k, v in zip(geography_df['rank'], geography_df['title'])}
movies_dict = {v: k for k, v in zip(movies_df['rank'], movies_df['title'])}
paintings_dict = {v: k for k, v in zip(paintings_df['rank'], paintings_df['title'])}


def map_values(row, column_name):
    domain = row['domain']
    if pd.isnull(row[column_name]):
        return None
    items_list = ast.literal_eval(row[column_name])  #  convert string representation of list back to list
    
    if domain == 1:
        return [geography_dict.get(item, None) for item in items_list]
    elif domain == 2:
        return [movies_dict.get(item, None) for item in items_list]
    elif domain == 3:
        return [paintings_dict.get(item, None) for item in items_list]

complete_responses['votes'] = complete_responses.apply(map_values, column_name='votes', axis=1)
complete_responses['predictions'] = complete_responses.apply(map_values, column_name='predictions', axis=1)
complete_responses.drop(columns=['Worker', 'Problem', 'Treatment', 'Domain', 'response'], inplace=True)
complete_responses.columns = complete_responses.columns.str.lower()


print(complete_responses.head(10))

# Get unique treatment values
unique_treatments = complete_responses['treatment'].unique()

# Create the Elicitation Formats folder if it doesn't exist
folder_path = 'Elicitation Formats'
os.makedirs(folder_path, exist_ok=True)

# Split the dataset based on unique treatment values
for treatment in unique_treatments:
    treatment_data = complete_responses[complete_responses['treatment'] == treatment]
    
    # Get unique domain values for the current treatment
    unique_domains = treatment_data['domain'].unique()
    # Create the treatment subfolder if it doesn't exist
    # Get key for value treatment
    key_for_value_treatment = next((key for key, value in treatment_mapping.items() if value == treatment), None)

    treatment_folder_path = os.path.join(folder_path, key_for_value_treatment)
    os.makedirs(treatment_folder_path, exist_ok=True)

# Split the treatment data based on unique domain values and save them in the treatment subfolder
    for domain in unique_domains:
        domain_mapping = {1: 'Geography', 2: 'Movies', 3: 'Paintings'}
        domain_name = domain_mapping[domain]
        domain_data = treatment_data[treatment_data['domain'] == domain]
        file_path = os.path.join(treatment_folder_path, f'{key_for_value_treatment}_{domain_name}.csv')
        domain_data.to_csv(file_path, index=False)
