import json

def read_jsonl_file(filepath):
    data = []
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            data.append(json.loads(line.strip()))
    return data

def read_json_file(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return {entry['question_id']: entry['answer'] for entry in data}

def write_jsonl_file(filepath, data):
    with open(filepath, 'w', encoding='utf-8') as file:
        for entry in data:
            file.write(json.dumps(entry, ensure_ascii=False) + '\n')



jsonl_filepath = "./playground/data/eval/docvqa/docvqa_val.jsonl" # Modify the ground-truth file here
json_filepath = "[Inference_Results]" # Modify the inference json file here
output_filepath = "[Output_Path]" # Modify the output path here

jsonl_data = read_jsonl_file(jsonl_filepath)
json_data = read_json_file(json_filepath)

output_data = []
for entry in jsonl_data:
    question_id = entry['question_id']
    new_entry = {
        'id': question_id,
        'question': entry['text'].rstrip('\n'),
        'answer': entry['answers'],
        'pred': json_data.get(question_id, None)
    }
    output_data.append(new_entry)

write_jsonl_file(output_filepath, output_data)