from copy import deepcopy
import glob
import re
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from exam import ENEM

files = glob.glob("enem-experiments-results/*")

# Remove files with prob-dist
files = [file for file in files if "prob-dist" not in file]

new_df = None
for file in files:
    if "LC" in file:
        # Different process for LC (we have to create two versions of the same file)
        df = pd.read_parquet(file)

        # v1: discard first 5 questions (spanish as foreign language)
        #"CTT_SCORE", "TX_RESPOSTAS", "TX_GABARITO", "RESPONSE_PATTERN":
        df_v1 = deepcopy(df)
        df_v1.TX_RESPOSTAS = df_v1.TX_RESPOSTAS.apply(lambda x: x[5:])
        df_v1.TX_GABARITO = df_v1.TX_GABARITO.apply(lambda x: x[5:])
        df_v1.RESPONSE_PATTERN = df_v1.RESPONSE_PATTERN.apply(lambda x: x[5:])
        #CTT SCORE (sum of the number of ones in the RESPONSE_PATTERN)
        df_v1.CTT_SCORE = df_v1.RESPONSE_PATTERN.apply(lambda x: x.count("1"))
        # If it is shuffle (TX_RESPOSTAS_SHUFFLE not None), we have to do the same with TX_RESPOSTAS_SHUFFLE and TX_GABARITO_SHUFFLE
        if "shuffle" in df_v1.ENEM_EXAM_TYPE.iloc[0]:
            df_v1.TX_RESPOSTAS_SHUFFLE = df_v1.TX_RESPOSTAS_SHUFFLE.apply(lambda x: x[5:])
            df_v1.TX_GABARITO_SHUFFLE = df_v1.TX_GABARITO_SHUFFLE.apply(lambda x: x[5:])

        df_v1 = df_v1.reset_index(drop=True)

        if new_df is None:
            new_df = df_v1
        else:
            new_df = pd.concat([new_df, df_v1])
    else:
        if new_df is None:
            new_df = pd.read_parquet(file)
        else:
            new_df = pd.concat([new_df, pd.read_parquet(file)])

new_df["CO_PROVA"] = new_df.ENEM_EXAM.apply(lambda x: x.split("_")[-1])
new_df["EXAM_YEAR"] = new_df.ENEM_EXAM.apply(lambda x: x.split("_")[1])
new_df["EXAM_SUBJECT"] = new_df.ENEM_EXAM.apply(lambda x: x.split("_")[2])
new_df = new_df.reset_index(drop=True)

new_df.to_parquet("enem-experiments-results-processed.parquet")
