# -*- coding: utf-8 -*-
import os
import json
import asyncio
import time
from pathlib import Path
from typing import List, Dict, Any, Optional
from tqdm.asyncio import tqdm
import traceback
import pdb

from agentscope.model import DashScopeChatModel, OpenAIChatModel

GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
INPUT_FILE = "../deep_research_bench/data/criteria_data/criteria.jsonl"
OUTPUT_FILE = "/data/raw_data.jsonl"
MODEL_NAME = "gemini-3-pro-preview"  
BATCH_SIZE = 3  
TEMPERATURE = 0.7
MAX_TRIES_PER_BATCH = 30
API_CALL_INTERVAL = 2.0


INTENT_MODEL = OpenAIChatModel(
    api_key=OPENAI_API_KEY,
    model_name="gpt-4.1-2025-04-14",
    stream=False,
    client_args={
        "base_url": os.environ.get("OPENAI_BASE_URL"),
    },
)

# ===== Model initialization =====
if "qwen" in MODEL_NAME:
    MODEL = DashScopeChatModel(
        api_key=OPENAI_API_KEY,
        model_name=MODEL_NAME,
        enable_thinking=False,
        stream=False,
    )
else:
    MODEL = OpenAIChatModel(
        api_key=GEMINI_API_KEY,
        model_name=MODEL_NAME,
        stream=False,
        client_args={
            "base_url": os.environ.get("OPENAI_BASE_URL"),
        },
    )

def build_prompt_0(task: str, rubrics: Dict) -> str:
    rubrics_str = json.dumps(rubrics, ensure_ascii=False, indent=2)
    simplify_prompt = f"""You are a query analysis and transformation expert. Your task is to receive an "original query" and perform the following operations:
1. Simplify Query: Convert the original query into a more concise and broader `simple_query`. This `simple_query` should only retain the most core, high-level topic of the original query.
2. Extract Intent: Convert all the information that was simplified or removed into a `missing_intent` list. This list should be expressed from the user's first-person perspective (e.g., using "I want to..." or "I need to...").

# Original Query
{task}

# Important Rules
1. Maintain a Valid Query Format: The `simple_query` must remain a well-formed user request. It should preserve the original's core action (e.g., "analyze," "collect," "research") or question structure (e.g., "what is," "how does"). Do not reduce it to a mere topic phrase or a few keywords. For example, simplify "Collect data on X and analyze Y" to "Research the relationship between X and Y," not just "X and Y relationship."
2. If the original query is already very short, specific, and has no extra details that can be simplified (e.g., it is a single, direct technical question), then return an empty string for `simple_query` and an empty list for `missing_intent`.
3. Your output language must be the same as the original query's language. For instance, if the original query is in English, your output for the simple query and missing intents must also be in English. Conversely, if the original query is in Chinese, your output must also be in Chinese.

# Examples
1. 
Original Query: 收集整理目前中国9阶层实际收入和财务状况，特别研究得出中国的中产有哪些特点，实际中产人数，财力等
Simple Query: 收集整理中国9阶层经济情况
Missing Intent: [我希望收集的是中国9阶层的实际收入和财务状况, 我希望特别研究中国中产阶层的特点，包括实际中产人数，财力等]
2. 
Original Query: 调研国内金融机构之间的投资借贷关系与系统性风险的联系？对不同层次或类型的借贷关系和风险建模
Simple Query: 调研国内金融机构之间的关联
Missing Intent: [我希望调研的关联性包括投资借贷关系与系统性风险的联系, 我希望对不同层次或类型的借贷关系和风险建模]
3.
Original Query: 收集整理目前国际综合实力前十的保险公司的相关资料，横向比较各公司的融资情况、信誉度、过往五年的增长幅度、实际分红、未来在中国发展潜力等维度，并为我评估出最有可能在未来资产排名靠前的2-3家公司
Simple Query: 收集整理目前国际综合实力前十的保险公司的相关资料
Missing Intent: [我希望横向比较各公司的融资情况、信誉度、过往五年的增长幅度、实际分红、未来在中国发展潜力等维度, 我希望能评估出最有可能在未来资产排名靠前的2-3家公司]

# Output Format: You must strictly adhere to the following JSON format for your response.
{{
  "simple_query": "SIMPLE_QUERY_STRING",
  "missing_intent": [
    "MISSING_INTENT_1",
    "MISSING_INTENT_2"
  ]
}}
"""
    intent_transfer_prompt = f"""You are an intelligent assistant specializing in converting evaluation rubrics into a clear, direct list of user intents.
# Task
Given a JSON-formatted rubric below that describes the elements of a high-quality report. You will convert these evaluation criteria into a list of intents from a first-person user's perspective.

# Rubrics
{rubrics_str}

# Important Rules
1. Combine each criterion and its explanation to distill a specific, clear user intent. Ensure that all key information points from the original rubric are included.
2. Use the first-person perspective, emulating a user who knows what they want. For example, use phrases like "I want the report to...", "The report needs to clarify...", and "Please ensure...".
3. Your output language must match the language of the original query. That is, if the original query is in English, your output intents must also be in English. Conversely, if the original query is in Chinese, your output intents must also be in Chinese.

# Output Format: Please strictly adhere to the following JSON format for the result. List all criteria from the rubric in their original order.
{{
    "comprehensiveness": [INTENT_1, INTENT_2, ...],
    "insight": [INTENT_1, INTENT_2, ...],
}}"""

    return simplify_prompt, intent_transfer_prompt
# ========== Prompt Building Functions ==========
def build_prompt_1(task: str, intent_list: List) -> str:
    intent_list_str = "".join([f"- {intent}\n" for intent in intent_list])
    return f"""# 背景
Deep Research Agent的目标是根据用户的单个任务请求，生成一份全面、深入、高质量的研究报告。为了在研究开始前就确保方向正确，智能体具备在执行任务前向用户进行多轮追问的能力，挖掘出用户没有表达在任务请求中的潜在意图。这些追问被组织成一个“澄清树”（Clarification Tree）的结构，通过与用户的交互来逐渐细化和明确任务需求。

# 任务
你的核心任务是基于给定的原始粗粒度任务和用户潜在需求列表，设计出这个澄清树的初始版本。
具体步骤如下：
1.  设计问题与选项：遍历用户潜在需求列表, 将每一项都转化为一个面向用户的、清晰具体的澄清问题和2-3个可选项，选项可以从这一项中的“例如”、“包括”、“X和Y”、“X或Y”等文字中提取。例如“希望收集的是中国9阶层的实际收入和财务状况”这一项意图可以提取成“您更关注中国9阶层哪方面的经济状况”问题以及“实际收入”和“财务状况”这两个选项.
    - 注意:所有选项内容必须来源于用户潜在需求，这就意味着每个选项都是正确的用户需求。不能凭空捏造除用户潜在需求外的其他内容作为选项。
2.  构建澄清树结构：将所有问题和选项组织成一个JSON格式的澄清树。你需要定义：
    *   哪些问题是并行的初始问题（起始节点）。
    *   哪些问题之间存在依赖关系（一个问题的答案决定了下一个问题）。
    *   确保最终输出的JSON结构严格遵循下方“输出要求”中的格式。

# 输入数据
1. 原始粗粒度任务
{task}
2. 用户潜在需求列表
{intent_list_str}

# 输出要求
你只能生成一个JSON对象，不能生成任何其他的额外内容。该对象代表澄清树。其结构必须遵循以下示例格式（你不需要编写注释）。`start_node_ids`定义了无前置依赖的起始问题节点。`nodes`对象包含了所有问题节点的定义，`next_node_id`列表定义了选择一个选项后将触发的后续问题节点。
{{
  "start_node_ids": ["q1", "q2"], # 定义起始节点，因为初始Query后有两个分支
  "nodes": {{
    "q1": {{
      "id": "q1",
      "text": "追问1",
      "options": [
        {{
          "text": "选项1A", # 内容里不要包含选项标签，比如A、B、1、2
          "next_node_id": ["q3"] # 选择1A后，跳转到节点q3
        }},
        {{
          "text": "选项1B",
          "next_node_id": ["q3"] # 选择1B后，也跳转到节点q3
        }}
      ]
    }},
    "q2": {{
      "id": "q2",
      "text": "追问2",
      "options": [
        {{
          "text": "选项2A",
          "next_node_id": [] # 这是一个终点，没有后续追问
        }},
        {{
          "text": "选项2B",
          "next_node_id": [] # 这也是一个终点
        }}
      ]
    }},
    "q3": {{
      "id": "q3",
      "text": "追问3",
      "options": [
        {{
          "text": "选项3A",
          "next_node_id": []
        }},
        {{
          "text": "选项3B",
          "next_node_id": []
        }}
      ]
    }}
  }}
}}

# 重要规则
- 你的输出语言必须和原始粗粒度任务的语言相同。如果原始任务是由中文编写的，那么生成的追问和选项内容也应该是中文。同理，如果原始任务和评分标准是由英文编写的，那么生成的追问和选项内容也应该是英文。
- 每个问题对应的选项数量必须严格规定不能小于2, 也不能多于3个!
"""

def build_prompt_2(original_tree: Dict, intent_list: List) -> str:
    intent_list_str = "".join([f"- {intent}\n" for intent in intent_list])
    return f"""# 任务
你已经构建了一个初始的澄清树。现在，你需要通过增加更深层次的追问来扩展现有的澄清树。你的核心任务是根据一个更深层次、更细粒度的用户意图列表，深化和扩展已有的澄清树，增加其分支和深度，从而构建一个能全面捕捉用户意图的、更复杂的澄清路径，同时需要保证节点之间不存在内容重复的情况。

# 输入数据
1. 初始澄清树: 仅有少量节点甚至没有节点的简易澄清树
{original_tree}
2. 用户意图列表: 包含了用户对这个任务的所有完整需求, 其中有些需求可能需要通过联网搜索才能完成
{intent_list_str}

# 步骤指引
1. 仔细阅读和审查用户意图列表中的每一项，判断哪一项更适合通过询问用户来获取信息, 哪一项更适合通过网页搜索来获取信息。避免出现需要调研和搜索的知识重新询问回用户的情况, 导致用户的体验感降低。
2. 筛选出最适合通过询问用户来获取信息的不少于8个意图。他们往往可以影响研究方向、奠定研究范围、标准和重点的基础。
3. 遍历步骤2中筛选出来的所有意图, 将每一项转化为一个面向用户的、清晰具体的澄清问题和2-3个可选项，选项可以从意图中的“例如”、“包括”、“X和Y”、“X或Y”等文字中提取。例如“希望收集的是中国9阶层的实际收入和财务状况”这一项意图可以提取成“您更关注中国9阶层哪方面的经济状况”问题以及“实际收入”和“财务状况”这两个选项.
    - 注意:所有选项内容必须来源于用户潜在需求，这就意味着每个选项都是正确的用户需求。不能凭空捏造除用户潜在需求外的其他内容作为选项
4. 如果初始澄清树不存在任何节点，那么你直接构建一棵新澄清树树; 如果初始澄清树中存在节点, 那么你需要扩充澄清树, 节点添加步骤如下: 
    - 理解当前树的设计以及其中每个追问
    - 遍历每个节点，尽量为其每个选项设计不同的后续追问，这个追问也应该对澄清用户意图、提高最终报告质量有重要作用
    - 为这个追问节点也设计2~3个选项
    - 为每个选项设计不同的后续追问
    - 不断递归重复步骤2~4，直到为每个新增节点都构建一棵可清晰澄清用户意图的追问子树。对一个节点设计子树时可以基于某一个意图进行深挖，相当于为研究设计一个不断深入的路径，从而提高整体研究过程的深度。
    - 你最后只需要生成一个扩展后的澄清树JSON对象，不能生成任何其他的额外内容。

重要规则：
1. 仔细检查最后生成的树，如果其中存在内容高度重合的节点，对他们进行删除。
2. 每个问题对应的选项数量必须严格规定不能小于2, 也不能多于3个! 尽量让不同选项对应不同的后续追问以保证树节点的多样性和茂密性。
3. 扩展后的树结点总数不能超过20。
3. 你的输出语言必须和用户意图列表的语言相同。如果用户意图列表是由中文编写的，那么生成的追问和选项内容也应该是中文。同理，如果用户意图列表是由英文编写的，那么生成的追问和选项内容也应该是英文。

增加节点的方法可以参考如下示例。你生成树时不需要写注释。
原始树：
```json
{{
  "start_node_ids": ["q1", "q2"], # 定义起始节点，因为初始Query后有两个分支
  "nodes": {{
    "q1": {{
      "id": "q1",
      "text": "追问1",
      "options": [
        {{
          "text": "选项1A",
          "next_node_id": ["q3"] # 选择1A后，跳转到节点q3
        }},
        {{
          "text": "选项1B",
          "next_node_id": ["q3"] # 选择1B后，也跳转到节点q3
        }}
      ]
    }},
    "q2": {{
      "id": "q2",
      "text": "追问2",
      "options": [
        {{
          "text": "选项2A",
          "next_node_id": [] # 这是一个终点，没有后续追问
        }},
        {{
          "text": "选项2B",
          "next_node_id": [] # 这也是一个终点
        }}
      ]
    }},
    "q3": {{
      "id": "q3",
      "text": "追问3",
      "options": [
        {{
          "text": "选项3A",
          "next_node_id": []
        }},
        {{
          "text": "选项3B",
          "next_node_id": []
        }}
      ]
    }}
  }}
}}```
扩展后的澄清树JSON对象。它应包含初始树的所有节点，并增加新的节点和连接关系，以体现对用户深层意图的挖掘。格式需与下方示例一致。
```json
{{
  "start_node_ids": ["q1", "q2"],
  "nodes": {{
    "q1": {{
      "id": "q1",
      "text": "追问1",
      "options": [
        {{
          "text": "选项1A",
          "next_node_id": ["q3", "q4"] # 为选项1A增加一个后续追问
        }},
        {{
          "text": "选项1B",
          "next_node_id": ["q3", "q5"] # 为选项1B增加一个不同的后续追问
        }}
      ]
    }},
    "q2": {{
      "id": "q2",
      "text": "追问2",
      "options": [
        {{
          "text": "选项2A",
          "next_node_id": ["q6"] 
        }},
        {{
          "text": "选项2B",
          "next_node_id": ["q7"] 
        }}
      ]
    }},
    "q3": {{
      "id": "q3",
      "text": "追问3",
      "options": [
        {{
          "text": "选项3A",
          "next_node_id": ["q8"]
        }},
        {{
          "text": "选项3B",
          "next_node_id": ["q9"]
        }}
      ]
    }},
    "q4": {{
      "id": "q4",
      "text": "追问4",
      "options": [ # 为q4进一步构造子树，两个选项指向不同的后续追问节点q11和q12
        {{
          "text": "选项4A",
          "next_node_id": ["q11", ...]
        }},
        {{
          "text": "选项4B",
          "next_node_id": ["q12", ...]
        }}
      ]
    }},
    "q5": {{
      "id": "q5",
      "text": "追问5",
      "options": [
        {{
          "text": "选项5A",
          "next_node_id": [...]
        }},
        {{
          "text": "选项5B",
          "next_node_id": [...]
        }}
      ]
    }},
    "q6": {{
      "id": "q6",
      "text": "追问6",
      "options": [
        {{
          "text": "选项6A",
          "next_node_id": [...]
        }},
        {{
          "text": "选项6B",
          "next_node_id": [...]
        }}
      ]
    }},
    "q7": {{
      "id": "q7",
      "text": "追问7",
      "options": [
        {{
          "text": "选项7A",
          "next_node_id": [...]
        }},
        {{
          "text": "选项7B",
          "next_node_id": [...]
        }}
      ]
    }},
    ...
  }}
}}```"""


# ========== Async LLM Calling ==========
async def call_model_async(messages: list, model: Any) -> str:
    """Async call to LLM model and return plain text."""
    
    try:
        res = await model(messages=messages, temperature=TEMPERATURE)
    except TypeError:
        res = await model(messages, temperature=TEMPERATURE)

    # Handle streaming response
    try:
        agen = getattr(res, "__aiter__", None)
        if callable(agen):
            parts = []
            async for chunk in res:
                content = getattr(chunk, "content", None)
                if isinstance(content, list):
                    for blk in content:
                        if isinstance(blk, dict):
                            t = blk.get("text") or blk.get("content")
                        else:
                            t = getattr(blk, "text", None) or getattr(blk, "content", None)
                        if t:
                            parts.append(str(t))
                elif isinstance(content, str):
                    parts.append(content)
            if parts:
                return "\n".join(parts).strip()
    except Exception:
        pass

    # Handle non-streaming response
    content = None
    try:
        content = res.content
    except Exception:
        content = None
    if content is not None:
        if isinstance(content, list):
            parts = []
            for blk in content:
                if isinstance(blk, dict):
                    t = blk.get("text") or blk.get("content")
                else:
                    t = getattr(blk, "text", None) or getattr(blk, "content", None)
                if t:
                    parts.append(str(t))
            if parts:
                return "\n".join(parts).strip()
        elif isinstance(content, str):
            return content.strip()

    # OpenAI compatible structure fallback
    if isinstance(res, dict):
        try:
            choices = res.get("choices") or []
            if choices:
                ch0 = choices[0]
                if isinstance(ch0, dict):
                    msg = ch0.get("message", {})
                    if isinstance(msg, dict) and isinstance(msg.get("content"), str):
                        return msg["content"].strip()
                    if isinstance(ch0.get("text"), str):
                        return ch0["text"].strip()
            for k in ("output", "data", "message", "content", "text"):
                v = res.get(k)
                if isinstance(v, str):
                    return v.strip()
        except Exception:
            pass

    return str(res)

async def call_model_with_retry(messages: List[Dict], model: Any, parse_json: bool) -> Optional[str]:
    """Calls the model with retry logic for JSON parsing and rate limiting."""
    for i in range(MAX_TRIES_PER_BATCH):
        try:
            response_text = await call_model_async(messages, model)
            if not parse_json:
                return response_text
            else:
                start_index = response_text.find('{')
                end_index = response_text.rfind('}')
                if start_index != -1 and end_index != -1 and end_index > start_index:
                    json_str = response_text[start_index : end_index + 1]
                    return json.loads(json_str)  
                else:
                    raise json.JSONDecodeError("No valid JSON object found in response.", response_text, 0)

        except json.JSONDecodeError as e:
            print(f"Attempt {i + 1} failed: Invalid JSON format. Error: {e}. Retrying...")
            await asyncio.sleep(API_CALL_INTERVAL*(i+1))
        except Exception as e:
            error_str = str(e).lower()
            print(f"Attempt {i + 1} failed with an unexpected error: {e}. Retrying...")
            traceback.print_exc()
            await asyncio.sleep(API_CALL_INTERVAL*(i+1))
    
    print("All retry attempts failed. Returning None.")
    return None

async def process_sample(sample: Dict) -> Optional[Dict]:
    """
    Processes a single sample to generate and append responses.
    Returns the updated sample or None if processing fails.
    """
    # async with semaphore:
    print(f"Processing sample with query: '{sample['prompt'][:50]}...'")
    
    # --- First Call ---
    rubrics = {
        "comprehensiveness": sample["criterions"]["comprehensiveness"],
        "insight": sample["criterions"]["insight"],
    }
    simplify_prompt, intent_transfer_prompt = build_prompt_0(sample["prompt"], rubrics)
    simplify_message = [{"role": "user", "content": simplify_prompt}]
    intent_transfer_message = [{"role": "user", "content": intent_transfer_prompt}]
    response_simplify = await call_model_with_retry(simplify_message, INTENT_MODEL, parse_json=True)
    response_intent = await call_model_with_retry(intent_transfer_message, INTENT_MODEL, parse_json=True)
    if response_simplify is None or response_intent is None:
        print(f"Failed to extract intent for query: '{sample['prompt'][:50]}...'")
        return None
    simple_query = response_simplify.get('simple_query', '')
    missing_intent = response_simplify.get('missing_intent', [])
    if simple_query == '' and missing_intent == []:
        original_tree = {
          "start_node_ids": [],
          "nodes": {}
        }
        original_tree_str = json.dumps(original_tree, ensure_ascii=False, indent=2)
    else:
        original_tree_prompt = build_prompt_1(simple_query, missing_intent)
        original_tree_message = [{"role": "user", "content": original_tree_prompt}]
        original_tree = await call_model_with_retry(original_tree_message, MODEL, parse_json=True)
        original_tree_str = json.dumps(original_tree, ensure_ascii=False, indent=2)
    # --- Second Call ---
    finegrained_tree_prompt = build_prompt_2(original_tree_str, response_intent['comprehensiveness']+response_intent['insight'])
    finegrained_tree_message = [
        {"role": "user", "content": original_tree_prompt},
        {"role": "assistant", "content": original_tree_str},
        {"role": "user", "content": finegrained_tree_prompt},
    ]
    finegrained_tree = await call_model_with_retry(finegrained_tree_message, MODEL, parse_json=True)
    finegrained_tree_str = json.dumps(finegrained_tree, ensure_ascii=False, indent=2)
    # --- Append results to sample ---
    try:
        updated_sample = {"id": sample["id"]}
        updated_sample['original_query'] = sample["prompt"]
        updated_sample['simple_query'] = simple_query
        updated_sample['missing_intent'] = missing_intent + response_intent['comprehensiveness'] + response_intent['insight']
        updated_sample['original_tree'] = json.loads(original_tree_str)
        updated_sample['finegrained_tree'] = json.loads(finegrained_tree_str)
        return updated_sample
    except json.JSONDecodeError as e:
        print(f"Final JSON parsing failed even after retry logic. Error: {e}. Skipping sample.")
        return None

async def main():
    """
    Main function to read, process, and write data in parallel.
    """
    output_path = Path(OUTPUT_FILE)
    output_path.parent.mkdir(parents=True, exist_ok=True)

    print(f"Reading samples from {INPUT_FILE}...")
    try:
        with open(INPUT_FILE, 'r', encoding='utf-8') as f:
            samples = [json.loads(line) for line in f]
    except FileNotFoundError:
        print(f"Error: Input file not found at {INPUT_FILE}")
        return
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON from {INPUT_FILE}: {e}")
        return
        
    print(f"Found {len(samples)} samples to process.")

    successful_results = []
    for sample in tqdm(samples):
        results = await process_sample(sample)
        if results is None:
            continue
        successful_results.append(results)
        with open(OUTPUT_FILE, 'w', encoding='utf-8') as f:
            for item in successful_results:
                f.write(json.dumps(item, ensure_ascii=False) + '\n')
    
            
    print("Processing complete.")
    print(f"Total samples: {len(samples)}")
    print(f"Successfully processed: {len(successful_results)}")
    print(f"Failed or skipped: {len(samples) - len(successful_results)}")



if __name__ == "__main__":
    asyncio.run(main())

