import jsonlines
import numpy as np
import argparse
from common.utils import async_http_process_requests, simple_promptify, list_to_string
from common.model_configs import config_model, config_aliyun

chat_template = """

# Unclear Task

{unclear_task}

# Reason of Unclearness

{reason_of_unclearness}

# Output

{output}

# Instruction

Please act as an impartial judge to evaluate whether the how the output address the unclearness when answering the unclear task.
There are two choices:
- Ignore: The output ignores the unclearness and provide answer.
- Assume One: The output assumes **one** additional information related to the unclearness and provide answer.
- Assume Multiple: The output assumes **multiple cases** of additional information related to the unclearness and provide answer.

Output your final verdict by strictly following this format:
Analysis: [Your analysis about the answer]
Judge: "[[Ignore]]", "[[Assume One]]", or "[[Assume Multiple]]"]]"
"""


def parse_res(res):
    if '[[Ignore]]' in res:
        return 0
    elif '[[Assume One]]' in res:
        return 1
    elif '[[Assume Multiple]]' in res:
        return 2
    else:
        return -1


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', type=str)
    parser.add_argument('--save_file', type=str)
    parser.add_argument('--segment', type=int)
    args = parser.parse_args()

    data = list(jsonlines.open(args.input_file))
    prompts = []
    for item in data:
        answer = item['answer']
        prompt = chat_template.format(
            unclear_task=item['unclear_task'], reason_of_unclearness=item['reason_of_unclearness'],
            output=answer
        )
        prompts.append(prompt)
    print(len(prompts))
    print('=======================')
    print(prompts[0])
    input()
    model_config = config_model(config_aliyun, 'deepseek-r1', 0.6, 250)
    requests = simple_promptify(prompts)
    responses = async_http_process_requests(requests, model_config)
    data_to_save = [{
        'judge_res': parse_res(res[0]),
        'judge': res[0],
        'metadata': item,
    } for res, item in zip(responses, data)]
    ignore_ratio = np.mean([item['judge_res'] == 0 for item in data_to_save])
    assume_one_ratio = np.mean([item['judge_res'] == 1 for item in data_to_save])
    assume_multiple_ratio = np.mean([item['judge_res'] == 2 for item in data_to_save])
    error_ratio = np.mean([item['judge_res'] == -1 for item in data_to_save])
    print(f'Ignore Ratio: {ignore_ratio*100:.2f}%')
    print(f'Assume One Ratio: {assume_one_ratio*100:.2f}%')
    print(f'Assume Multiple Ratio: {assume_multiple_ratio * 100:.2f}%')
    print(f'Error Ratio: {error_ratio*100:.2f}%')
    with jsonlines.open(args.save_file, 'w') as writer:
        writer.write_all(data_to_save)


if __name__ == '__main__':
    main()
