import csv, sys
import jsonlines
csv.field_size_limit(sys.maxsize)

splits = ["test", "train"]
rolePlay_Pseudocode2Code = "Imagine you are an expert in translating a pseudo-code in natural language to a C++ code." +\
                " Do not return anything other than the C++ code!"

genericCodePrefix =\
'''#include <iostream>
#include <cstdio>
#include <cstring>
#include <cmath>
#include <ctime>
#include <cstdlib>
#include <algorithm>
#include <vector>
#include <string>
#include <map>
#include <unordered_map>
#include <set>
#include <unordered_set>
#include <deque>
#include <random>
#include <stack>
#include <queue>
#include <tuple>
#include <list>
#include <climits>
#include <cassert>
using namespace std;
'''

for split in splits:
    Pseudocode2Code_list = []
    id_list = []
    csvFile = f"./{split}.csv"
    with open(csvFile, mode ='r') as file:
        csvFile = csv.DictReader(file)
        currLineNum = 0
        for line in csvFile:
            currLineNum += 1
            text = line["text"]
            cppCode = genericCodePrefix + line["code"]
            id = line["probid"] + "_" + line["workerid"]
            Pseudocode2Code_list.append({"prompt": f"{rolePlay_Pseudocode2Code}\n\n### Pseudo-Code in Natural Language:\n{text}\n\n### C++ Code:\n", 
                                "completion": f"{cppCode}"})
            id_list.append(id + "\n")
            print (f"Line {currLineNum} of {split}")
        #write nl-cpp file
        with jsonlines.open(f"./{split}.pseudocode-cpp.jsonl", 'w') as writer:
            writer.write_all(Pseudocode2Code_list)
        #write id file
        id_list[-1] = id_list[-1].rstrip()
        f = open(f"./{split}.pseudocode-cpp.id", "w")
        f.writelines(id_list)
        f.close()