import re
import argparse
import random
import pandas as pd
import pdb
import json

def build_parser():
	# Data loading parameters
	parser = argparse.ArgumentParser(description='Pre Process')

	parser.add_argument('-out_dir', type=str, default='generation_outputs/', help='Output Directory')
	parser.add_argument('-folder_name', type=str, default='programmatic_adversarial_gpt-4o_0.7_2024-08-05-18:07:40_11', help='Folder name')
	parser.add_argument('-data', type=str, default='prog_qa', help='Data filename')
	parser.add_argument('-exp_type', type=str, default='programmatic_docs', help='Exp type')
	
	return parser

def programmatic_docs_preprocess(data):
	ls = data.to_dict(orient='records')
	new_ls = []

	for i in range(len(ls)):
		id1 = ls[i]["ID"]
		persona = ls[i]["Persona"]
		env = ls[i]["Environment"]
		sim = ls[i]["Similarity"]
		adv_questions = json.loads(ls[i]["Adv_Question"])
		adv_answers = json.loads(ls[i]["Adv_Answer"])
		adv_docs_info = json.loads(ls[i]["Adv_Documents_Info"])
		adv_ans_pts = json.loads(ls[i]["Adv_Ans_Points"])
		adv_doc_ans_pts = json.loads(ls[i]["Adv_Doc_Ans_Points"])

		questions_ls = [ls[i]["Question"]]
		answers_ls = [ls[i]["Answer"]]
		docs_info_ls = [ls[i]["Documents_Info"]]
		ans_pts_ls = [json.loads(ls[i]["Ans_Points"])]
		doc_ans_pts_ls = [json.loads(ls[i]["Doc_Ans_Points"])]


		for j in range(len(adv_questions)):
			questions_ls.append(adv_questions[j])
			answers_ls.append(adv_answers[j])
			docs_info_ls.append(adv_docs_info[j])
			ans_pts_ls.append(adv_ans_pts[j])
			doc_ans_pts_ls.append(adv_doc_ans_pts[j])

		# for y in range(len(questions_ls)):
		# 	ques = questions_ls[y]
		# 	ans = answers_ls[y]
		# 	docs_info = docs_info_ls[y]
		# 	ans_pts = ans_pts_ls[y]
		# 	doc_ans_pts = doc_ans_pts_ls[y]

		# 	adv_ques = questions_ls.copy()
		# 	adv_ques.remove(ques)
		# 	adv_ans = answers_ls.copy()
		# 	adv_ans.remove(ans)
		# 	adv_docs_info = docs_info_ls.copy()
		# 	adv_docs_info.remove(docs_info)
		# 	adv_ans_pts = ans_pts_ls.copy()
		# 	adv_ans_pts.remove(ans_pts)
		# 	adv_doc_ans_pts = doc_ans_pts_ls.copy()
		# 	adv_doc_ans_pts.remove(doc_ans_pts)

		new_ls.append([id1, persona, env, sim, questions_ls, answers_ls, docs_info_ls, ans_pts_ls, doc_ans_pts_ls])

	new_df = pd.DataFrame(new_ls, columns = ['ID', 'Persona', 'Environment', 'Similarity', 'Questions', 'Answers', 'Documents_Info', 'Ans_Points', 'Doc_Ans_Points'])
	new_df['Questions'] = new_df['Questions'].apply(json.dumps)
	new_df['Answers'] = new_df['Answers'].apply(json.dumps)
	new_df['Documents_Info'] = new_df['Documents_Info'].apply(json.dumps)
	new_df['Ans_Points'] = new_df['Ans_Points'].apply(json.dumps)
	new_df['Doc_Ans_Points'] = new_df['Doc_Ans_Points'].apply(json.dumps)
	new_df.to_csv(args.out_dir + "/" + args.folder_name + "/" + args.data + "_accumulated.tsv", sep = '\t', index = None)
	print("Length of Final Data: ", str(len(new_df)))

def main(args):
	if args.exp_type == "programmatic_docs":
		data = pd.read_csv(args.out_dir + "/" + args.folder_name + "/" + args.data + ".tsv", sep='\t')
		programmatic_docs_preprocess(data)
	
if __name__ == "__main__":
	parser = build_parser()
	args = parser.parse_args()

	main(args)