import os
import json
import math

input_files = ["test_2d.jsonl", "test_3d.jsonl"]
questions = []
for input_file in input_files:
    questions += [json.loads(q) for q in open(os.path.expanduser(input_file), "r")]

idx = 0
converted_questions = []
for question in questions:
    new_question = {
        "question_id": idx,
        "image": question["filename"],
        "text": question["prompt"],
        "answer": question["answer"],
        "category": f"{question['type']}_{question['task']}",
        "type": question["type"],
        "task": question["task"],
        "source": question["source"],
        "source_filename": question["source_filename"],
    }
    if question["task"] == "Depth":
        new_question["text"] = question["prompt"].replace("\n(A)", " Select from the following choices.\n(A)")
    elif question["task"] == "Distance":
        new_question["text"] = question["prompt"].replace("\n(A)", " Select from the following choices.\n(A)").replace("Estimate the real-world distances between objects in this image. ", "")
    new_question["text"] = new_question["text"].replace(" Select from the following choices.\n(A)", "\n(A)") + '\n' + "Answer with the option's letter from the given choices directly."
    converted_questions.append(new_question)
    idx += 1

output_file = "llava_cvbench_test_v2.jsonl"
with open(os.path.expanduser(output_file), "w") as f:
    for question in converted_questions:
        f.write(json.dumps(question) + "\n")


def split_list(lst, n):
    """Split a list into n (roughly) equal-sized chunks"""
    chunk_size = math.ceil(len(lst) / n)  # integer division
    return [lst[i:i+chunk_size] for i in range(0, len(lst), chunk_size)]

nsplits = 4
chunks = split_list(converted_questions, nsplits)
output_file = "splits/{i}_of_{n}.jsonl"
for i, chunk in enumerate(chunks):
    with open(os.path.expanduser(output_file.format(i=i, n=nsplits)), "w") as f:
        for question in chunk:
            f.write(json.dumps(question) + "\n")
