import pandas as pd
import csv
import argparse
from datetime import datetime
import math
# 'user_id', 'problem_id', 'correct', 'order_id' template_id skill_id
def write_csv(path, rows):
    csv_file = open(path, 'w', newline='')
    writer = csv.writer(csv_file)
    writer.writerows(rows)
    csv_file.close()

def date_to_timestamp(date):
    datetime_obj = datetime.strptime(date, "%Y-%m-%d %H:%M:%S.%f")
    timestamp = datetime_obj.timestamp()
    return timestamp

def init_data(data_path):
    data = pd.read_csv(data_path, sep = '	', low_memory = False).dropna(subset = [
        'Anon Student Id',
        'Problem Name',
        'KC(SubSkills)',
        'Correct First Attempt',
        'Step Start Time',
        'Step End Time',
        'Incorrects',
        'Corrects', 
        'Step Duration (sec)', 
        'Step Name'
    ]).sort_values(by=['Step Start Time']).drop_duplicates()
    
    data = data[['Anon Student Id', 'Problem Name', 'Step Name', 'KC(SubSkills)', 'Correct First Attempt', 'Step Start Time']].values.tolist()

    data = [[usr, qst + '_' + stp, skl, cor, date_to_timestamp(tme)] for usr, qst, stp, skl, cor, tme in data if not pd.isna(tme)]
    data = sorted(data, key = lambda x: x[-1])
    
    qsts = list(set([data[i][1] for i in range(len(data))]))
    qsts = [qst for qst in qsts]
    qsts.sort()
    qsts = [[qst, i] for i, qst in enumerate(qsts)]
    write_csv('./question.csv', qsts)
    qsts = dict(qsts)

    skls = list(set([data[i][2] for i in range(len(data))]))
    skls.sort()
    skls = [[skl, i] for i, skl in enumerate(skls)]
    write_csv('./skill.csv', skls)
    skls = dict(skls)
    
    usrs = list(set([data[i][0] for i in range(len(data))]))
    usrs = [usr for usr in usrs]
    usrs.sort()
    usrs = [[usr, i] for i, usr in enumerate(usrs)]
    write_csv('./user.csv', usrs)
    usrs = dict(usrs)

    qst_skl = {}
    for usr, qst, skl, cor, tme in data:
        qst = qsts[qst]
        qst_skl[qst] = skls[skl]
        
    qst_skl = [[i, qst_skl[i]] for i in range(len(qsts))]
    write_csv('./question_skill.csv', qst_skl)
    
    events = [[usrs[usr], qsts[qst], int(cor)] for usr, qst, skl, cor, tme in data]

    write_csv('./record.csv', events)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(add_help = False)
    parser.add_argument('--data_path', type = str)
    args, _ = parser.parse_known_args()
    init_data(args.data_path)