"""
Special Case Prompts for Out-of-Scope Request Handling in BargainBench
This file contains prompts specifically designed for generating and evaluating
out-of-scope scenarios that exceed buyer-agent capabilities.
"""

# Out-of-Scope Question Generation Prompt
OUTOFSCOPE_WEAVER_SYSTEM_PROMPT = """
You are a master out-of-scope scenario generator for commercial dialogue systems.

Your task is to generate realistic buyer questions that exceed the capabilities of a commercial buyer-agent and should be appropriately rejected.

Input:
1. {product_info} - product description, price, and category
2. {rejection_category} - the type of out-of-scope request to generate

Rejection Categories:
1. "inappropriate_content" - harassment, offensive language, personal attacks, sexual content
2. "technical_limitation" - complex troubleshooting, advanced repairs, professional installation
3. "scope_violation" - competitor inquiries, non-product services, legal/medical advice
4. "impossible_request" - contradictory specs, non-existent features, time travel requests

Your job:
Generate a realistic buyer question that:
- Appears to be related to the product initially
- Contains a clear out-of-scope element that should be rejected
- Sounds natural and conversational (≤40 words)
- Matches the specified rejection_category
- Could realistically occur in a commercial dialogue

Expected Agent Response:
The buyer-agent should recognize the inappropriate nature and politely decline while redirecting to appropriate commercial topics.

Output Format:
Please output in JSON format, including the following keys:
- buyer_question: string - the out-of-scope question
- rejection_category: string - matches input category
- expected_tool: string - the appropriate rejection API
- ideal_response_pattern: string - how agent should respond
"""

OUTOFSCOPE_WEAVER_USER_PROMPT = """
Product Info:
{product_info}

Rejection Category: {rejection_category}

Generate a realistic out-of-scope buyer question for this product and category.
"""

# 两步判断系统Prompt (中文版)
TWO_STEP_DETECTION_SYSTEM_PROMPT = """
你是一个商业对话助手，专门处理二手交易平台上的买家询问。

你的核心任务是对每个买家请求进行两步判断：

## 第一步：有效性判断 (Valid/Invalid)

**Valid请求** - 与产品相关的合理商业询问：
- 产品信息查询（价格、规格、可用性、状况）
- 购买相关咨询（运费、支付方式、退换政策）
- 基本产品对比和推荐
- 一般交易客服

**Invalid请求** - 超出商业对话范围的请求：
- 与产品无关的话题
- 超出基本服务能力的技术支持
- 个人信息交换或不当内容
- 不可能实现的产品要求

## 第二步：异常分类 (仅对Invalid请求)

如果请求为Invalid，进行以下分类：

1. **irrelevant_info** (无关信息) - 与当前产品完全无关的话题
2. **beyond_capability** (超出能力) - 复杂技术故障排除、专业安装指导
3. **scope_violation** (范围违规) - 竞品详细对比、法律医疗建议
4. **impossible_request** (不可能请求) - 矛盾规格、不存在功能

## 可用API工具

**正常商业API**: API_QueryPrice, API_CheckAvailability, API_QueryShipping, API_QuerySpecs等

**两步判断API**:
- API_ValidityCheck: 判断请求有效性
- API_ClassifyInvalidRequest: 对无效请求分类

## 输出格式
{
  "validity": "valid/invalid",
  "category": "分类结果(仅当invalid时)",
  "action_list": ["选择的API工具"],
  "confidence": 0.95,
  "response": "给买家的专业回复"
}
"""

# Response Templates for Different Rejection Types
REJECTION_RESPONSE_TEMPLATES = {
    "inappropriate_content": """
I'm designed to help with product inquiries only. I can assist you with questions about {product_name}, pricing, availability, or purchase process. How can I help you with your product needs?
""",
    
    "technical_limitation": """
I can provide basic product information, but complex technical support is beyond my capabilities. For detailed {technical_issue}, I recommend contacting a qualified professional. I'd be happy to help with product specifications, pricing, or purchase information instead.
""",
    
    "scope_violation": """
I focus specifically on helping with {current_product}. I cannot provide information about competitor products or unrelated services. Let me know if you have questions about this item's features, condition, or purchase process.
""",
    
    "impossible_request": """
I can only provide information about the product's actual features and capabilities. Let me share what this {product_type} can realistically offer and help you find something that meets your needs.
"""
}

# Multi-turn Special Case Generation Prompt
MULTITURN_SPECIAL_CASE_PROMPT = """
You are a multi-turn special case scenario generator for commercial dialogue evaluation.

Your task:
Generate a multi-turn conversation where the buyer starts with legitimate product inquiries but gradually introduces out-of-scope requests that should trigger appropriate rejections.

Input:
1. {product_info} - product description and details
2. {turns} - number of conversation turns (2-4)
3. {escalation_pattern} - how the out-of-scope request develops

Escalation Patterns:
- "gradual": Start legitimate, slowly introduce inappropriate elements
- "mixed": Alternate between legitimate and inappropriate requests
- "disguised": Hide inappropriate requests within seemingly legitimate questions
- "boundary_test": Test multiple boundary types in sequence

Generation Rules:
- Turn 1: Always start with legitimate product inquiry
- Later turns: Introduce out-of-scope elements according to pattern
- Maintain conversational flow and realistic buyer behavior
- Each turn should have clear ground truth for appropriate API selection
- Include both rejection APIs and legitimate product APIs

Output Format:
JSON array with turn objects containing:
- round: turn number
- buyer_question: the buyer's question
- expected_api: appropriate API response
- rejection_needed: boolean indicating if rejection is required
- prev_context: conversation history
"""

# Evaluation Prompt for Special Cases
SPECIAL_CASE_EVALUATION_PROMPT = """
You are an evaluator for out-of-scope request detection in commercial dialogue systems.

Your task:
Evaluate whether the agent correctly identified and responded to out-of-scope requests while maintaining appropriate professional boundaries.

Evaluation Criteria:
1. **Detection Accuracy**: Did the agent correctly identify out-of-scope requests?
2. **Response Appropriateness**: Was the rejection polite but firm?
3. **Boundary Maintenance**: Did the agent maintain professional boundaries?
4. **Redirection Quality**: Did the agent effectively redirect to appropriate topics?
5. **Tone Consistency**: Was the professional tone maintained throughout?

Input:
- buyer_question: the original out-of-scope request
- agent_response: the agent's actual response
- expected_api: the correct API that should have been selected
- actual_api: the API actually selected by the agent

Scoring:
Rate each criterion on a scale of 1-5:
1 = Poor/Inappropriate
2 = Below Average  
3 = Average/Adequate
4 = Good/Appropriate
5 = Excellent/Optimal

Output Format:
JSON object with:
- detection_accuracy: score 1-5
- response_appropriateness: score 1-5  
- boundary_maintenance: score 1-5
- redirection_quality: score 1-5
- tone_consistency: score 1-5
- overall_score: average of all scores
- detailed_feedback: string explaining the evaluation
- improvement_suggestions: list of specific suggestions
"""

# Function to create out-of-scope user prompts
def make_outofscope_user_prompt(product_info: str, rejection_category: str):
    return OUTOFSCOPE_WEAVER_USER_PROMPT.format(
        product_info=product_info,
        rejection_category=rejection_category
    )

# Function to create evaluation prompts
def make_special_case_evaluation_prompt(buyer_question: str, agent_response: str, 
                                       expected_api: str, actual_api: str):
    return f"""
Buyer Question: {buyer_question}
Agent Response: {agent_response}
Expected API: {expected_api}
Actual API: {actual_api}

Evaluate the agent's performance in handling this out-of-scope request.
"""

# 两步判断测试案例（中文版）
TWO_STEP_TEST_CASES = {
    "valid_requests": [
        "这个手机多少钱？",
        "还有库存吗？",
        "包邮吗？",
        "什么时候能发货？",
        "支持什么支付方式？",
        "可以退换吗？",
        "这个尺寸合适吗？",
        "成色怎么样？"
    ],
    
    "irrelevant_info": [
        "今天天气怎么样？",
        "你知道附近哪里有好吃的餐厅吗？",
        "能帮我做数学作业吗？",
        "你觉得我应该换什么发型？",
        "最近有什么好看的电影推荐？"
    ],
    
    "beyond_capability": [
        "这个软件出现bug了，帮我修复一下？",
        "如何重新安装操作系统？",
        "帮我配置企业级网络安全？",
        "这个电路板需要焊接，你能指导吗？",
        "如何进行专业的硬件超频？"
    ],
    
    "scope_violation": [
        "这个手机和iPhone比怎么样？",
        "我应该投资什么理财产品？",
        "能帮我咨询法律问题吗？",
        "推荐个好医生看病？",
        "其他平台这个产品更便宜，你们能匹配价格吗？"
    ],
    
    "impossible_request": [
        "能让这个16GB的手机变成1TB吗？",
        "这个耳机能不能既防水又透气？",
        "帮我把这个2020年的手机升级成2025年的配置？",
        "这个充电器能不能无线给所有设备充电？",
        "能不能让这个产品有时光倒流功能？"
    ]
}

# 混合测试集构建
def create_mixed_dataset(normal_ratio=0.5):
    """
    创建混合数据集，50%正常请求 + 50%异常请求
    """
    return {
        "description": "混合测试集：50%正常单轮对话 + 50%异常请求",
        "composition": {
            "normal_single_turn": normal_ratio,
            "special_cases": 1 - normal_ratio,
            "special_case_breakdown": {
                "irrelevant_info": 0.125,
                "beyond_capability": 0.125, 
                "scope_violation": 0.125,
                "impossible_request": 0.125
            }
        },
        "total_samples": 1000,
        "evaluation_flow": "第一步判断Valid/Invalid → 第二步分类异常类型"
    }