"""
Orchestra Core - SubAgentRunner (SubAgent 运行器)

负责运行 SubAgent 的标准执行循环:
1. 初始化 SubAgent
2. 与环境交互执行任务
3. 收集执行结果

关键设计:
- 提供标准的 agent-environment 交互循环
- 支持步数限制和超时控制
- 收集完整的执行轨迹
"""
from __future__ import annotations

import asyncio
import inspect
from typing import Optional

from orchestra_core.interfaces import (
    Environment,
    LevelResult,
    StepRecord,
)
from orchestra_core.sub_agent import SubAgent


class SubAgentRunner:
    """SubAgent 运行器 - 管理 SubAgent 的执行循环"""
    
    def __init__(self, step_timeout: Optional[float] = None):
        """
        初始化 SubAgentRunner
        
        Args:
            step_timeout: 单步执行超时时间（秒），None 表示不限制
        """
        self.step_timeout = step_timeout
    
    async def run(self, agent: SubAgent, env: Environment) -> LevelResult:
        """
        运行 SubAgent
        
        Args:
            agent: SubAgent 实例
            env: 执行环境
        
        Returns:
            LevelResult: 执行结果
        """
        try:
            # 获取环境信息并重置
            info = env.get_basic_info()
            agent.reset(info)
            
            # 重置环境
            reset_result = env.reset()
            obs = await reset_result if inspect.isawaitable(reset_result) else reset_result
            
            history = []
            total_reward = 0.0
            max_steps = info.max_steps
            
            for t in range(max_steps):
                current_step = t + 1
                
                try:
                    # 执行一步
                    if self.step_timeout:
                        step_result = await asyncio.wait_for(
                            agent.step(
                                observation=obs,
                                history=history,
                                current_step=current_step,
                                max_steps=max_steps,
                            ),
                            timeout=self.step_timeout,
                        )
                    else:
                        step_result = await agent.step(
                            observation=obs,
                            history=history,
                            current_step=current_step,
                            max_steps=max_steps,
                        )
                except asyncio.TimeoutError:
                    step_record = StepRecord(
                        observation=obs,
                        action={"action": "error", "params": {"error": "step_timeout"}},
                        reward=0.0,
                        raw_response="step timeout",
                        done=True,
                        info={"error": "step_timeout"},
                        raw_input=None,
                    )
                    history.append(step_record)
                    break
                
                # 解析返回值
                if isinstance(step_result, (list, tuple)):
                    if len(step_result) == 3:
                        action, raw_response, raw_input = step_result
                    elif len(step_result) == 2:
                        action, raw_response = step_result
                        raw_input = None
                    else:
                        raise ValueError(f"agent.step returned {len(step_result)} values")
                else:
                    raise TypeError(f"agent.step returned unsupported type: {type(step_result)}")
                
                # 执行环境步骤
                obs_next, reward, done, step_info = await env.step(action)
                
                step_record = StepRecord(
                    observation=obs,
                    action=action,
                    reward=reward,
                    raw_response=raw_response,
                    done=done,
                    info=step_info,
                    raw_input=raw_input,
                )
                history.append(step_record)
                total_reward += reward
                obs = obs_next
                
                if done:
                    break
            
            # 构建结果
            usage_summary = agent.llm.get_usage_summary()
            result = LevelResult(
                model=usage_summary.get("model", agent.sub_model),
                total_reward=total_reward,
                steps=len(history),
                done=history[-1].done if history else False,
                trace=history,
                cost=usage_summary.get("total_cost", 0.0),
                input_tokens=usage_summary.get("total_input_tokens", 0),
                output_tokens=usage_summary.get("total_output_tokens", 0),
            )
            
            return result
            
        except Exception as e:
            raise RuntimeError(f"SubAgent execution failed: {e}") from e
