import jsonlines
import numpy as np
import argparse
from common.utils import async_http_process_requests, simple_promptify, list_to_string, extract_between
from common.model_configs import config_model, config_aliyun

chat_template_reason = """
# Incomplete Question

{unclear_task}

# Reason of Incompleteness

{reason_of_unclearness}

# Output

{output}

# Instruction

Please act as an impartial judge to evaluate whether the output has tried to address the incompleteness.
For example, the output tried different possibilities about the incompleteness or guessing how the incompleteness arises.
Your assessment should be grounded on the Incomplete Question and the Reason of Incompleteness.

Output your final verdict by strictly following this format:
Analysis: [Your analysis about the Output]
Judge: "[[Yes]]" if the output has tried to address the incompleteness, otherwise "[[No]]"
"""


def parse_res(res):
    if '[[Yes]]' in res:
        return 1
    else:
        return 0


keywords = ['alternative', 'wait', 'but', 'check', 'hold on']


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input_file', type=str)
    parser.add_argument('--save_file', type=str)
    args = parser.parse_args()

    data = list(jsonlines.open(args.input_file))
    prompts = []
    thoughts_len = []
    for item in data:
        if item['judge_res']:
            continue
        thoughts = item['thought'].split('\n\n')
        r_thoughts = []
        for thought in thoughts:
            flag = False
            for keyword in keywords:
                if keyword in thought.lower():
                    flag = True
                    break
            if flag:
                r_thoughts.append(thought)
        thoughts_len.append(len(r_thoughts))
        thoughts = '\n\n'.join(thoughts[:10])
        prompt = chat_template_reason.format(
            unclear_task=item['metadata']['unclear_task'], reason_of_unclearness=item['metadata']['reason_of_unclearness'],
            output=thoughts
        )
        prompts.append(prompt)
    model_config = config_model(config_aliyun, 'deepseek-r1', 0.6, 250)
    requests = simple_promptify(prompts)
    responses = async_http_process_requests(requests, model_config)
    data_to_save = [{**{
        'judge_res_thought': parse_res(res[0]),
        'judge_thought': res[0],
    }, **item} for res, item in zip(responses, data)]
    pass_ratio = np.mean([item['judge_res'] for item in data_to_save])
    print(f'Pass Ratio: {pass_ratio*100:.2f}%')
    with jsonlines.open(args.save_file, 'w') as writer:
        writer.write_all(data_to_save)


if __name__ == '__main__':
    main()
