import json
import jsonlines

with open('data/datasets/coqa/coqa-dev-v1.0.json', 'r') as infile:
            data = json.load(infile)['data']

dataset = {}
dataset['story'] = []
dataset['question'] = []
dataset['answer'] = []
dataset['additional_answers'] = []
dataset['id'] = []
for sample_id, sample in enumerate(data):
    story = sample['story']
    questions = sample['questions']
    answers = sample['answers']
    additional_answers = sample['additional_answers']
    for question_index, question in enumerate(questions):
        dataset['story'].append(story)
        dataset['question'].append(question['input_text'])
        dataset['answer'].append({
            'text': answers[question_index]['input_text'],
            'answer_start': answers[question_index]['span_start']
        })
        dataset['id'].append(sample['id'] + '_' + str(question_index))
        additional_answers_list = []
        for i in range(3):
            additional_answers_list.append(additional_answers[str(i)][question_index]['input_text'])
        dataset['additional_answers'].append(additional_answers_list)
        story = story.strip() + '\nQ: ' + question['input_text'].strip() + ' A: ' + answers[question_index]['input_text'].strip()
        if not story[-1] == '.':
            story = story + '.'


queries = [f"{s}\n\nPlease read the above article and Q&A, and directly answer the following questions with one or few words:\nQ: {q} A:" for s, q in zip(dataset["story"], dataset["question"])]
ground_truth = []
for ans, add in zip(dataset["answer"], dataset["additional_answers"]):
    if isinstance(ans["text"], str):
        answer = [ans["text"]]
    
    answer.extend(add)
    
    ground_truth.append(list(set(answer)))
data = list()
for q, gt, question in zip(queries, ground_truth, dataset["question"]):
    data.append(
        {
            "query": q,
            "truthful answer": gt,
            "question": question
        }
    )


with jsonlines.open("data/datasets/coqa/test.jsonl", "w") as writer:
    writer.write_all(data)
