import jsonlines
import numpy as np
import argparse
from common.utils import async_http_process_requests, simple_promptify, list_to_string
from common.model_configs import config_model, config_aliyun

chat_template = """

# Unclear Task

{unclear_task}

# Reason of Unclearness

{reason_of_unclearness}

# Clarification Question

{clarification_question}

# Instruction

Please act as an impartial judge to evaluate the appropriateness and sufficiency of the clarification question in addressing the unclear question, \
given the content of the unclear question and the reason of unclearness.

Output your final verdict by strictly following this format:
Analysis: [Your analysis about the clarification question]
Judge: "[[Yes]]" if the task clarification question is appropriate and sufficient, otherwise "[[No]]"
"""


def parse_res(res):
    if '[[Yes]]' in res:
        return 1
    else:
        return 0


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', type=str)
    parser.add_argument('--save_file', type=str)
    args = parser.parse_args()

    data = list(jsonlines.open(args.input_file))
    prompts = []
    for item in data:
        prompt = chat_template.format(
            unclear_task=item['unclear_task'], reason_of_unclearness=item['reason_of_unclearness'],
            clarification_question=item['answer']
        )
        prompts.append(prompt)
    model_config = config_model(config_aliyun, 'deepseek-r1', 0.6, 250)
    requests = simple_promptify(prompts)
    responses = async_http_process_requests(requests, model_config)
    data_to_save = [{
        'judge_res': parse_res(res[0]),
        'judge': res[0],
        'metadata': item,
    } for res, item in zip(responses, data)]
    pass_ratio = np.mean([item['judge_res'] for item in data_to_save])
    print(f'Verification Pass: {pass_ratio*100:.2f}%')
    with jsonlines.open(args.save_file, 'w') as writer:
        writer.write_all(data_to_save)


if __name__ == '__main__':
    main()
