import pandas as pd
import csv
import argparse
import time
# 'user_id', 'problem_id', 'correct', 'order_id' template_id skill_id
def write_csv(path, rows):
    csv_file = open(path, 'w', newline='')
    writer = csv.writer(csv_file)
    writer.writerows(rows)
    csv_file.close()

def get_time_stamp(t):
    time_array = time.strptime(t, "%Y-%m-%d %H:%M:%S")
    time_stamp = int(time.mktime(time_array))
    return time_stamp

def init_data(data_path):
    data = pd.read_csv(data_path, sep = ',', low_memory = False)
    data = data.sort_values(['start_time'])
    data = data[~data['skill_id'].isna()]

    counts = data['user_id'].value_counts()
    lrns = counts[counts > 1].index.tolist()
    data = data[data['user_id'].isin(lrns)]

    qsts = list(set(data['problem_id'].tolist()))
    qsts = [int(qst) for qst in qsts]
    qsts.sort()
    qsts = [[qst, i] for i, qst in enumerate(qsts)]
    write_csv('./question.csv', qsts)
    qsts = dict(qsts)

    skls = list(set(data['skill_id'].tolist()))
    skls.sort()
    skls = [[skl, i] for i, skl in enumerate(skls)]
    write_csv('./skill.csv', skls)
    skls = dict(skls)
    
    usrs = list(set(data['user_id'].tolist()))
    usrs = [int(usr) for usr in usrs]
    usrs.sort()
    usrs = [[usr, i] for i, usr in enumerate(usrs)]
    write_csv('./user.csv', usrs)
    usrs = dict(usrs)

    qst_skl = {}
    for qst, skl in data[['problem_id', 'skill_id']].values.tolist():
        qst = qsts[qst]
        qst_skl[qst] = skls[skl]
        
    qst_skl = [[i, qst_skl[i]] for i in range(len(qsts))]
    write_csv('./question_skill.csv', qst_skl)

    events = data[['user_id', 'problem_id', 'correct', 'start_time']].values.tolist()
    events = [[usrs[usr], qsts[qst], int(cor)] for usr, qst, cor, order in events]

    write_csv('./record.csv', events)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(add_help = False)
    parser.add_argument('--data_path', type = str)
    args, _ = parser.parse_known_args()
    init_data(args.data_path)