import os
import json
import pickle
import pandas as pd
from fixed_variables import *


def get_data(base_dir):
    all_chunk_data = []
    chunk_dir = os.path.join(base_dir, "chunks")
    num_chunks = len(os.listdir(chunk_dir))
    for chunk_idx in range(num_chunks):
        file_name = os.path.join(chunk_dir, f"chunk_{chunk_idx}.json")
        with open(file_name, "r", encoding="utf-8") as f:
            all_chunk_data += json.load(f)
    return all_chunk_data


def find_valid_examples(model):
    csv_file = os.path.join("comparison", model.split("/")[-1], f"real.csv")
    df = pd.read_csv(csv_file)
    condition = (
        (df["Recognition"] == 0) &
        (df["Understanding"] == 1) &
        (df["Reasoning"] == 1)
    )
    indices = df[condition].index.tolist()
    return indices


def get_examples(valid_indices, model):
    with open("pkl/data_test.pkl", 'rb') as file:
        data = pickle.load(file)
    solution = get_data(os.path.join("benchmark", model.split("/")[-1], "real"))

    final_indices = []
    for example_idx in valid_indices:
        responses = {}
        responses['idx'] = example_idx
        responses['img_url'] = data[example_idx]['img_url']
        for question in questions:
            res_dict = {}
            res_dict['QA'] = data[example_idx]['QAs'][question]
            res_dict['analysis'] = solution[example_idx]['solution'][question]['analysis']
            res_dict['choice'] = solution[example_idx]['solution'][question]['choice']
            responses[question] = res_dict

        if data[example_idx]['category'] in ['Chemistry', 'Mathematics']:
            continue
        if len(responses["Recognition"]["analysis"].split()) < len(responses["Reasoning"]["analysis"].split()):
            continue

        example_dir = f"demo/response/{model_map[model]}/{example_idx}"
        os.makedirs(example_dir, exist_ok=True)
        with open(os.path.join(example_dir, f"{example_idx}_{model_map[model]}.json"), "w") as f:
            json.dump(responses, f, indent=4)
        with open(os.path.join(example_dir, f"{example_idx}_real.png"), "wb") as f:
            f.write(data[example_idx]['real_bytes'])
        with open(os.path.join(example_dir, f"{example_idx}_syn.png"), "wb") as f:
            f.write(data[example_idx]['syn_bytes'])
        final_indices.append(example_idx)

    print(f"{len(final_indices)}, {final_indices}")


if __name__ == '__main__':
    # model = "Qwen/Qwen2.5-VL-32B-Instruct"
    # model = "llava-hf/llava-v1.6-34b-hf"
    # model = "meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo"
    model = "google/gemma-3-27b-it"
    valid_indices = find_valid_examples(model)
    get_examples(valid_indices, model)

