import json
import re
import requests
import os
import time
from collections import OrderedDict
import traceback

def infer_verdict(text: str) -> str:
    """
    从模型回答中提取 Verdict: [[A]] / [[B]] / [[C]]
    """
    m = re.findall(r"verdict\s*:\s*\[\[([ABC])\]\]", text, flags=re.I)
    if m:
        return f"[[{m[-1]}]]"
    return "[[C]]"   # 如果没找到，就默认平局

def query(prompt: str, model_name: str):
    url = 
    headers = {
        "Authorization": 
        "Content-Type": "application/json"
    }
    data = {
        "model": model_name,
        "messages": [{"role": "user", "content": prompt}]
    }

    t0 = time.time()
    rsp = requests.post(url, headers=headers, json=data).json()
    t1 = time.time()

    msg = rsp["choices"][0]["message"]
    resp = msg["content"].strip()
    cost = {k: rsp["usage"][k] for k in ("prompt_tokens",
                                         "completion_tokens",
                                         "total_tokens")}
    elapsed = round(t1 - t0, 2)  # 秒，保留两位小数
    return resp, cost, elapsed

class Judge:
    def __init__(self, model_name: str):
        self.model_name = model_name

    @staticmethod
    def generate_prompt(question: str, ans_a: str, ans_b: str) -> str:
        return f"""
Please act as an impartial judge and evaluate the quality of the responses provided by two AI assistants to the user question displayed below. 
    
    **Evaluation Guidelines:**
    - Choose the assistant that better follows instructions and answers the question
    - Consider: helpfulness, relevance, accuracy, depth, creativity, and detail
    - Compare responses directly
    - Avoid position/length/name biases
    - Be objective

    **Output Format Requirements:**
    1.Make your judgment on which AI assistant's response is better and provide evidence.
    2.**At the very end of your answer, write exactly**:  
    Verdict: [[A]] or [[B]] or [[C]] on its own line. ("[[A]]" means assistant A is better;"[[B]]" means assistant B is better;"[[C]]" means a tie)
    3.The content of the Verdict should only be [[A]] or [[B]] or [[C]].
        **Accurate Example of the Verdict**
        Verdict: [[A]].
        **Wrong Example of the Verdict**
        Verdict: [A]. Verdict: [[[A]]]. Verdict: A.

[User Question]
{question}

[Assistant A's Answer]
{ans_a}

[Assistant B's Answer]
{ans_b}

[Your Evaluation]
"""

    def __call__(self, question: str, ans_a: str, ans_b: str):
        prompt = self.generate_prompt(question, ans_a, ans_b)
        resp, cost, elapsed = query(prompt, self.model_name)

        lbl_resp = infer_verdict(resp)
        if not resp.rstrip().endswith(lbl_resp):
            resp += f"\n\n{lbl_resp}"

        map2str = {"[[A]]": "A>B", "[[B]]": "B>A", "[[C]]": "A=B"}
        res_resp = map2str[lbl_resp]

        return {
            "judge_response": resp,
            "judge result": res_resp,
            "cost_tokens": cost,
            "time": elapsed
        }

def main(input_path, output_dir, start_id=1):
    os.makedirs(output_dir, exist_ok=True)
    judge = Judge("deepseek-r1")
    with open(input_path, encoding="utf-8") as f:
        data = json.load(f)
    data = data if isinstance(data, list) else [data]

    # 构建id到下标的映射，便于任意id断点续跑
    id_to_idx = {str(item["id"]): idx for idx, item in enumerate(data)}
    start_idx = id_to_idx.get(str(start_id), 0)

    for idx, item in enumerate(data[start_idx:], start=start_idx):
        cur_id = item.get("id", idx+1)
        output_file = os.path.join(output_dir, f"id:{cur_id}.json")
        if os.path.exists(output_file):
            print(f"文件已存在，跳过 id={cur_id}")
            continue

        try:
            rec = judge(item["question"], item["response_A"], item["response_B"])
            ordered = OrderedDict()
            ordered["id"]            = cur_id
            ordered["question"]      = item.get("question", "")
            ordered["response_a"]    = item.get("response_A", "")
            ordered["response_b"]    = item.get("response_B", "")
            ordered["model_A"]       = item.get("model_A", "")
            ordered["model_B"]       = item.get("model_B", "")
            ordered["judge model"]   = "deepseek-r1"
            ordered["judge_response"] = rec["judge_response"]
            ordered["judge result"]   = rec["judge result"]   # A>B / B>A / A=B
            ordered["cost_tokens"]    = rec["cost_tokens"]
            ordered["time"]           = rec["time"]           # 秒

            with open(output_file, "w", encoding="utf-8") as f:
                json.dump(ordered, f, ensure_ascii=False, indent=4)
            print(f"✅ id={cur_id} 已保存到 {output_file}")

        except Exception as e:
            print(f"❌ id={cur_id} 处理失败: {e}")
            traceback.print_exc()
            break

if __name__ == "__main__":
    # ===== 这里自定义你的输入输出和起始id =====
    input_path =          
    output_dir =           
    start_id = "93"                                    

    main(input_path, output_dir, start_id)
