import json
import os
import random

def load_original_policy():
    """Load the original retail policy document"""
    policy_path = "/code/jiateng-sandbox/taubench_application/tau-bench/tau_bench/envs/retail/wiki.md"
    with open(policy_path, 'r', encoding='utf-8') as f:
        return f.read()

def load_data_files():
    """Load orders, products, and users data"""
    data_dir = "/code/jiateng-sandbox/taubench_application/tau-bench/tau_bench/envs/retail/data"
    
    with open(f"{data_dir}/orders.json", 'r', encoding='utf-8') as f:
        orders = json.load(f)
    
    with open(f"{data_dir}/products.json", 'r', encoding='utf-8') as f:
        products = json.load(f)
    
    with open(f"{data_dir}/users.json", 'r', encoding='utf-8') as f:
        users = json.load(f)
    
    return orders, products, users

def load_specifications():
    """Load and parse specifications from Output.txt"""
    specs_path = "/code/jiateng-sandbox/taubench_application/CPT_creation/Output.txt"
    with open(specs_path, 'r', encoding='utf-8') as f:
        content = f.read()
    
    # Parse the content to extract different specification types
    lines = content.split('\n')
    
    # Parse fact illustrations
    fact_illustrations = []
    behavior_specifications = []
    workflow_simple = []
    workflow_complex = []
    
    current_section = None
    current_item = {}
    
    for line in lines:
        line = line.strip()
        
        if line.startswith("Fact Illustration:"):
            current_section = "fact"
            continue
        elif line.startswith("Behavior Specification:"):
            current_section = "behavior"
            continue
        elif line.startswith("Workflow Specification (Simple)"):
            current_section = "workflow_simple"
            continue
        elif line.startswith("Workflow Specification (Complex)"):
            current_section = "workflow_complex"
            continue
        
        if line.startswith('"Content":'):
            # Extract content
            content_text = line.split('"Content": "')[1].rstrip('",')
            current_item = {"Content": content_text}
        elif line.startswith('"Valid Scope":'):
            # Extract valid scope
            scope_text = line.split('"Valid Scope": ')[1]
            # Parse the list format
            scope_list = eval(scope_text.rstrip(','))
            current_item["Valid Scope"] = scope_list
            
            # Add to appropriate list
            if current_section == "fact":
                fact_illustrations.append(current_item.copy())
            elif current_section == "behavior":
                behavior_specifications.append(current_item.copy())
            elif current_section == "workflow_simple":
                workflow_simple.append(current_item.copy())
            elif current_section == "workflow_complex":
                workflow_complex.append(current_item.copy())
            
            current_item = {}
        elif line.startswith('"Complexity Level":'):
            # Extract complexity level for complex workflows
            complexity = int(line.split('"Complexity Level": ')[1].rstrip(','))
            current_item["Complexity Level"] = complexity
    
    return fact_illustrations, behavior_specifications, workflow_simple, workflow_complex

def generate_fact_illustration_questions(fact_illustrations, policy_token):
    """Generate questions for fact illustrations"""
    cpt_data = []
    
    for fact in fact_illustrations:
        content = fact["Content"]
        
        # Generate questions based on content type
        if "EST and 24 hour based" in content:
            questions = [
                f"Based on the '{policy_token}', what time format is used in the database?",
                f"Based on the '{policy_token}', how should time values be interpreted?",
                f"Based on the '{policy_token}', what does \"02:30:00\" mean in the system?",
                f"Based on the '{policy_token}', what timezone are database times stored in?"
            ]
            answers = [
                "All times in the database are EST and 24 hour based.",
                "All times are in EST timezone and use 24 hour format.",
                "\"02:30:00\" means 2:30 AM EST.",
                "All times in the database are stored in EST (Eastern Standard Time)."
            ]
            
        elif "user has a profile" in content:
            questions = [
                f"Based on the '{policy_token}', what information is stored in a user profile?",
                f"Based on the '{policy_token}', what types of payment methods are supported?",
                f"Based on the '{policy_token}', what are the components of a user profile?",
                f"Based on the '{policy_token}', what payment options are available to users?"
            ]
            answers = [
                "Each user has a profile containing email, default address, user id, and payment methods.",
                "Payment methods can be gift card, paypal account, or credit card.",
                "User profiles include email, default address, user id, and payment methods.",
                "Users can pay with gift cards, paypal accounts, or credit cards."
            ]
            
        elif "50 types of products" in content:
            questions = [
                f"Based on the '{policy_token}', how many product types does the retail store have?",
                f"Based on the '{policy_token}', how are product variants organized?",
                f"Based on the '{policy_token}', what is an example of product items with different options?",
            ]
            answers = [
                "The retail store has 50 types of products.",
                "For each product type, there are variant items with different options.",
                "For a 't shirt' product, there could be items with options like 'color blue size M' and 'color red size L'.",
            ]
            
        elif "unique product id" in content:
            questions = [
                f"Based on the '{policy_token}', how do product and item IDs relate to each other?",
                f"Based on the '{policy_token}', what is the relationship between product IDs and item IDs?",
            ]
            answers = [
                "Each product has a unique product id, and each item has a unique item id. They have no relations and should not be confused.",
                "Product IDs and item IDs are unique and unrelated - they should not be confused with each other.",
            ]
            
        elif "order can be in status" in content:
            questions = [
                f"Based on the '{policy_token}', what are the possible order statuses?",
                f"Based on the '{policy_token}', which order statuses allow actions to be taken?",
                f"Based on the '{policy_token}', when can an agent take action on an order?",
                f"Based on the '{policy_token}', what order statuses exist in the system?"
            ]
            answers = [
                "Orders can be in status 'pending', 'processed', 'delivered', or 'cancelled'.",
                "You can only take action on pending or delivered orders.",
                "Generally, actions can only be taken on pending or delivered orders.",
                "The possible order statuses are: pending, processed, delivered, and cancelled."
            ]
        else:
            # Generic question for any other facts
            questions = [f"Based on the '{policy_token}', {content.lower()}"]
            answers = [content]
        
        for q, a in zip(questions, answers):
            cpt_data.append({"text": f"Question: {q}\n\nAnswer: {a}"})
    
    return cpt_data

def generate_comprehensive_behavior_questions(behavior_specifications, policy_token):
    """Generate comprehensive questions for behavior specifications"""
    cpt_data = []
    
    for spec in behavior_specifications:
        content = spec["Content"]
        
        if "authenticate the user identity" in content:
            questions = [
                f"Based on the '{policy_token}', how must user identity be authenticated?",
                f"Based on the '{policy_token}', what is required even when the user provides their user id?",
                f"Based on the '{policy_token}', what authentication methods are available?",
                f"Based on the '{policy_token}', when must user authentication occur?",
                f"Based on the '{policy_token}', can you skip authentication if the user provides their user id?",
                f"Based on the '{policy_token}', what are the two ways to authenticate a user?",
                f"Based on the '{policy_token}', is email alone sufficient for authentication?",
                f"Based on the '{policy_token}', what combination of information can be used for authentication besides email?"
            ]
            answers = [
                "User identity must be authenticated by locating their user id via email, or via name + zip code.",
                "Authentication must be done even when the user already provides the user id.",
                "Authentication can be done via email, or via name + zip code combination.",
                "User authentication must happen at the beginning of the conversation.",
                "No, authentication must be done even when the user already provides the user id.",
                "Authentication can be done via email, or via name + zip code combination.",
                "Yes, email alone is sufficient for authentication.",
                "Name + zip code combination can be used for authentication."
            ]
            
        elif "one user per conversation" in content:
            questions = [
                f"Based on the '{policy_token}', how many users can be helped per conversation?",
                f"Based on the '{policy_token}', what should happen if a user asks about another user?",
                f"Based on the '{policy_token}', can multiple users be served in one conversation?",
                f"Based on the '{policy_token}', what is the policy on handling multiple user requests?",
                f"Based on the '{policy_token}', can you handle multiple requests from the same user?",
                f"Based on the '{policy_token}', what must you deny?",
                f"Based on the '{policy_token}', if a user asks about their friend's order, what should you do?",
                f"Based on the '{policy_token}', can you switch between helping different users in one conversation?"
            ]
            answers = [
                "You can only help one user per conversation, but can handle multiple requests from the same user.",
                "You must deny any requests for tasks related to any other user.",
                "No, only one user can be served per conversation.",
                "One user per conversation, but multiple requests from that same user are allowed.",
                "Yes, you can handle multiple requests from the same user.",
                "You must deny any requests for tasks related to any other user.",
                "You must deny the request as it relates to another user.",
                "No, you cannot switch between helping different users in one conversation."
            ]
            
        elif "explicit user confirmation" in content:
            questions = [
                f"Based on the '{policy_token}', what must happen before taking consequential actions?",
                f"Based on the '{policy_token}', when is explicit user confirmation required?",
                f"Based on the '{policy_token}', what actions require user confirmation?",
                f"Based on the '{policy_token}', what must be done before updating the database?",
                f"Based on the '{policy_token}', what should you list before getting confirmation?",
                f"Based on the '{policy_token}', what specific word indicates user confirmation?",
                f"Based on the '{policy_token}', can you proceed without user confirmation for database updates?",
                f"Based on the '{policy_token}', which operations are considered consequential actions?"
            ]
            answers = [
                "Before taking consequential actions that update the database, you must list the action detail and obtain explicit user confirmation (yes) to proceed.",
                "Explicit user confirmation is required before actions that update the database (cancel, modify, return, exchange).",
                "Cancel, modify, return, and exchange actions require user confirmation.",
                "You must list the action detail and obtain explicit user confirmation before updating the database.",
                "You must list the action detail before getting confirmation.",
                "The specific word 'yes' indicates user confirmation.",
                "No, you cannot proceed without explicit user confirmation for database updates.",
                "Cancel, modify, return, and exchange operations are considered consequential actions."
            ]
            
        elif "one tool call at a time" in content:
            questions = [
                f"Based on the '{policy_token}', how many tool calls can be made at a time?",
                f"Based on the '{policy_token}', what is the rule about tool calls and user responses?",
                f"Based on the '{policy_token}', can you make a tool call while responding to the user?",
                f"Based on the '{policy_token}', what is the constraint on simultaneous tool calls and responses?",
                f"Based on the '{policy_token}', if you take a tool call, what should you not do?",
                f"Based on the '{policy_token}', if you respond to the user, what should you not do?",
                f"Based on the '{policy_token}', can you make multiple tool calls simultaneously?",
                f"Based on the '{policy_token}', what is the maximum number of tool calls per turn?"
            ]
            answers = [
                "You should at most make one tool call at a time.",
                "If you take a tool call, you should not respond to the user at the same time. If you respond to the user, you should not make a tool call.",
                "No, you cannot make a tool call while responding to the user.",
                "Tool calls and user responses cannot happen simultaneously - it's one or the other.",
                "If you take a tool call, you should not respond to the user at the same time.",
                "If you respond to the user, you should not make a tool call.",
                "No, you should make at most one tool call at a time.",
                "The maximum number of tool calls per turn is one."
            ]
            
        elif "not make up any information" in content:
            questions = [
                f"Based on the '{policy_token}', what should you not make up?",
                f"Based on the '{policy_token}', what should you not give?",
                f"Based on the '{policy_token}', what is prohibited regarding information creation?",
                f"Based on the '{policy_token}', can you provide subjective recommendations?",
                f"Based on the '{policy_token}', what sources should information come from?",
                f"Based on the '{policy_token}', are comments allowed?",
                f"Based on the '{policy_token}', can you invent procedures not provided?",
                f"Based on the '{policy_token}', what type of knowledge should you avoid making up?"
            ]
            answers = [
                "You should not make up any information, knowledge, or procedures not provided from the user or the tools.",
                "You should not give subjective recommendations or comments.",
                "Making up information or knowledge or procedures not provided is prohibited.",
                "No, you should not give subjective recommendations.",
                "Information should come from the user or the tools.",
                "No, you should not give subjective comments.",
                "No, you cannot invent procedures not provided from the user or tools.",
                "You should not make up any knowledge not provided from the user or tools."
            ]
            
        elif "transfer the user to a human agent" in content:
            questions = [
                f"Based on the '{policy_token}', when should you transfer the user to a human agent?",
                f"Based on the '{policy_token}', what is the only condition for transferring to human agent?",
                f"Based on the '{policy_token}', can you transfer users for any reason?",
                f"Based on the '{policy_token}', what determines if a request can be handled?",
                f"Based on the '{policy_token}', if a request is within your scope, should you transfer?",
                f"Based on the '{policy_token}', what phrase indicates when to transfer?",
                f"Based on the '{policy_token}', should you transfer if you're unsure about policy?",
                f"Based on the '{policy_token}', what is the criteria for human agent transfer?"
            ]
            answers = [
                "You should transfer the user to a human agent if and only if the request cannot be handled within the scope of your actions.",
                "The only condition is if the request cannot be handled within the scope of your actions.",
                "No, you should only transfer if and only if the request cannot be handled within your scope.",
                "Whether a request is within the scope of your actions determines if it can be handled.",
                "No, if a request is within your scope, you should not transfer.",
                "The phrase 'if and only if' indicates the specific condition for transfer.",
                "No, you should only transfer if the request cannot be handled within your scope of actions.",
                "The criteria is that the request cannot be handled within the scope of your actions."
            ]
        else:
            # Generic question for other behavior specs
            questions = [f"Based on the '{policy_token}', what is the rule about {content[:50]}...?"]
            answers = [content]
        
        for q, a in zip(questions, answers):
            cpt_data.append({"text": f"Question: {q}\n\nAnswer: {a}"})
    
    return cpt_data

def generate_detailed_workflow_questions(workflow_specs, policy_token, is_complex=False):
    """Generate detailed if-else branch questions for workflow specifications"""
    cpt_data = []
    
    for spec in workflow_specs:
        content = spec["Content"]
        
        # For simple workflow specifications
        if not is_complex:
            if "cancelled if its status is 'pending'" in content:
                questions = [
                    f"Based on the '{policy_token}', can you cancel an order if its status is 'pending'?",
                    f"Based on the '{policy_token}', can you cancel an order if its status is 'delivered'?",
                    f"Based on the '{policy_token}', can you cancel an order if its status is 'processed'?",
                    f"Based on the '{policy_token}', what should you check before cancelling an order?",
                    f"Based on the '{policy_token}', what is the prerequisite for order cancellation?",
                    f"Based on the '{policy_token}', if an order status is 'cancelled', can you cancel it again?",
                    f"Based on the '{policy_token}', what must you do before taking the cancellation action?",
                    f"Based on the '{policy_token}', is checking order status required before cancellation?"
                ]
                answers = [
                    "Yes, an order can be cancelled if its status is 'pending'.",
                    "No, an order cannot be cancelled if its status is 'delivered'.",
                    "No, an order cannot be cancelled if its status is 'processed'.",
                    "You should check its status before taking the action.",
                    "The order status must be 'pending'.",
                    "No, an order that is already 'cancelled' cannot be cancelled again.",
                    "You must check its status before taking the action.",
                    "Yes, checking order status is required before cancellation."
                ]
                
            elif "modified if its status is 'pending'" in content:
                questions = [
                    f"Based on the '{policy_token}', can you modify an order if its status is 'pending'?",
                    f"Based on the '{policy_token}', can you modify an order if its status is 'delivered'?",
                    f"Based on the '{policy_token}', can you modify an order if its status is 'processed'?",
                    f"Based on the '{policy_token}', what should you check before modifying an order?",
                    f"Based on the '{policy_token}', what is the prerequisite for order modification?",
                    f"Based on the '{policy_token}', if an order status is 'cancelled', can you modify it?",
                    f"Based on the '{policy_token}', what must you do before taking the modification action?",
                    f"Based on the '{policy_token}', is checking order status required before modification?"
                ]
                answers = [
                    "Yes, an order can be modified if its status is 'pending'.",
                    "No, an order cannot be modified if its status is 'delivered'.",
                    "No, an order cannot be modified if its status is 'processed'.",
                    "You should check its status before taking the action.",
                    "The order status must be 'pending'.",
                    "No, an order that is 'cancelled' cannot be modified.",
                    "You must check its status before taking the action.",
                    "Yes, checking order status is required before modification."
                ]
                
            elif "returned if its status is 'delivered'" in content:
                questions = [
                    f"Based on the '{policy_token}', can you return an order if its status is 'delivered'?",
                    f"Based on the '{policy_token}', can you return an order if its status is 'pending'?",
                    f"Based on the '{policy_token}', can you return an order if its status is 'processed'?",
                    f"Based on the '{policy_token}', what should you check before returning an order?",
                    f"Based on the '{policy_token}', what is the prerequisite for order return?",
                    f"Based on the '{policy_token}', if an order status is 'cancelled', can you return it?",
                    f"Based on the '{policy_token}', what must you do before taking the return action?",
                    f"Based on the '{policy_token}', is checking order status required before return?"
                ]
                answers = [
                    "Yes, an order can be returned if its status is 'delivered'.",
                    "No, an order cannot be returned if its status is 'pending'.",
                    "No, an order cannot be returned if its status is 'processed'.",
                    "You should check its status before taking the action.",
                    "The order status must be 'delivered'.",
                    "No, an order that is 'cancelled' cannot be returned.",
                    "You must check its status before taking the action.",
                    "Yes, checking order status is required before return."
                ]
                
            elif "exchanged if its status is 'delivered'" in content:
                questions = [
                    f"Based on the '{policy_token}', can you exchange an order if its status is 'delivered'?",
                    f"Based on the '{policy_token}', can you exchange an order if its status is 'pending'?",
                    f"Based on the '{policy_token}', can you exchange an order if its status is 'processed'?",
                    f"Based on the '{policy_token}', what should you check before exchanging an order?",
                    f"Based on the '{policy_token}', what is the prerequisite for order exchange?",
                    f"Based on the '{policy_token}', if an order status is 'cancelled', can you exchange it?",
                    f"Based on the '{policy_token}', what must you do before taking the exchange action?",
                    f"Based on the '{policy_token}', is checking order status required before exchange?"
                ]
                answers = [
                    "Yes, an order can be exchanged if its status is 'delivered'.",
                    "No, an order cannot be exchanged if its status is 'pending'.",
                    "No, an order cannot be exchanged if its status is 'processed'.",
                    "You should check its status before taking the action.",
                    "The order status must be 'delivered'.",
                    "No, an order that is 'cancelled' cannot be exchanged.",
                    "You must check its status before taking the action.",
                    "Yes, checking order status is required before exchange."
                ]
                
            elif "confirm the order id and the reason" in content:
                questions = [
                    f"Based on the '{policy_token}', what information must the user confirm for cancellation?",
                    f"Based on the '{policy_token}', what are the valid reasons for cancellation?",
                    f"Based on the '{policy_token}', can you cancel without the order id?",
                    f"Based on the '{policy_token}', can you cancel without a reason?",
                    f"Based on the '{policy_token}', is 'changed my mind' a valid cancellation reason?",
                    f"Based on the '{policy_token}', what are the two acceptable cancellation reasons?",
                    f"Based on the '{policy_token}', do you need both order id and reason for cancellation?",
                    f"Based on the '{policy_token}', can the user provide any reason for cancellation?"
                ]
                answers = [
                    "The user needs to confirm the order id and the reason for cancellation.",
                    "Valid reasons are 'no longer needed' or 'ordered by mistake'.",
                    "No, you cannot cancel without the order id.",
                    "No, you cannot cancel without a reason.",
                    "No, 'changed my mind' is not a valid cancellation reason.",
                    "The two acceptable reasons are 'no longer needed' or 'ordered by mistake'.",
                    "Yes, you need both order id and reason for cancellation.",
                    "No, the user can only provide 'no longer needed' or 'ordered by mistake' as reasons."
                ]
        
        # For complex workflow specifications  
        else:
            if "refunded via the original payment method" in content:
                questions = [
                    f"Based on the '{policy_token}', what happens to the order status after user confirmation for cancellation?",
                    f"Based on the '{policy_token}', how is the refund processed for gift card payments?",
                    f"Based on the '{policy_token}', how long does refund take for non-gift card payments?",
                    f"Based on the '{policy_token}', what is the refund timeline for gift cards?",
                    f"Based on the '{policy_token}', what is the refund timeline for credit cards?",
                    f"Based on the '{policy_token}', which payment methods get immediate refunds?",
                    f"Based on the '{policy_token}', what is the range for non-gift card refund processing?",
                    f"Based on the '{policy_token}', does the refund method depend on the original payment method?"
                ]
                answers = [
                    "After user confirmation, the order status will be changed to 'cancelled'.",
                    "Gift card refunds are processed immediately.",
                    "Non-gift card payments take 5 to 7 business days for refund.",
                    "Gift card refunds are immediate.",
                    "Credit card refunds take 5 to 7 business days.",
                    "Only gift card payments get immediate refunds.",
                    "Non-gift card refunds take 5 to 7 business days.",
                    "Yes, the refund is processed via the original payment method."
                ]
                
            elif "single payment method different from the original" in content:
                questions = [
                    f"Based on the '{policy_token}', can the user choose multiple payment methods for modification?",
                    f"Based on the '{policy_token}', can the new payment method be the same as the original?",
                    f"Based on the '{policy_token}', what happens if the user wants to use a gift card with insufficient balance?",
                    f"Based on the '{policy_token}', what is required for gift card payment modifications?",
                    f"Based on the '{policy_token}', what happens to the order status after payment modification?",
                    f"Based on the '{policy_token}', how is the original payment method handled?",
                    f"Based on the '{policy_token}', when are gift card refunds processed for payment modifications?",
                    f"Based on the '{policy_token}', what is the refund timeline for original non-gift card payments?"
                ]
                answers = [
                    "No, the user can only choose a single payment method.",
                    "No, the payment method must be different from the original payment method.",
                    "The gift card must have enough balance to cover the total amount.",
                    "The gift card must have enough balance to cover the total amount.",
                    "After user confirmation, the order status will be kept 'pending'.",
                    "The original payment method will be refunded.",
                    "Original gift card payments are refunded immediately.",
                    "Original non-gift card payments are refunded in 5 to 7 business days."
                ]
                
            elif "can only be called once" in content and "pending (items modifed)" in content:
                questions = [
                    f"Based on the '{policy_token}', how many times can the modify items action be called?",
                    f"Based on the '{policy_token}', what happens to the order status after item modification?",
                    f"Based on the '{policy_token}', can you modify or cancel the order after item modification?",
                    f"Based on the '{policy_token}', what should you confirm before taking this action?",
                    f"Based on the '{policy_token}', what should you remind the customer about items?",
                    f"Based on the '{policy_token}', can you change product types during modification?",
                    f"Based on the '{policy_token}', what payment method requirement exists for item modification?",
                    f"Based on the '{policy_token}', what happens if a gift card doesn't have enough balance?"
                ]
                answers = [
                    "The modify items action can only be called once.",
                    "The order status changes to 'pending (items modified)'.",
                    "No, the agent will not be able to modify or cancel the order anymore.",
                    "You should confirm all the details are right and be cautious before taking this action.",
                    "You should remind the customer to confirm they have provided all items to be modified.",
                    "No, there cannot be any change of product types, e.g. modify shirt to shoe.",
                    "The user must provide a payment method to pay or receive refund of the price difference.",
                    "If the user provides a gift card, it must have enough balance to cover the price difference."
                ]
        
        for q, a in zip(questions, answers):
            cpt_data.append({"text": f"Question: {q}\n\nAnswer: {a}"})
    
    return cpt_data

def sample_data_instances(orders, products, users, num_samples=100):
    """Sample data instances for fine-grained CPT data"""
    # Convert to lists for sampling
    order_list = list(orders.items())
    product_list = list(products.items())
    user_list = list(users.items())
    
    # Sample instances
    sampled_orders = random.sample(order_list, min(num_samples, len(order_list)))
    sampled_products = random.sample(product_list, min(num_samples, len(product_list)))
    sampled_users = random.sample(user_list, min(num_samples, len(user_list)))
    
    return sampled_orders, sampled_products, sampled_users

def generate_return_scenarios(sampled_orders, sampled_users, policy_token, num_samples=100):
    """Generate comprehensive return scenarios"""
    scenarios = []
    
    for i, (order_id, order_data) in enumerate(sampled_orders[:num_samples]):
        user_id = order_data.get('user_id', 'unknown')
        items = order_data.get('items', [])
        order_status = order_data.get('status', 'delivered')
        
        if items and order_status == 'delivered':
            item_to_return = items[0]
            item_name = item_to_return.get('name', 'Laptop')
            item_price = item_to_return.get('price', 599.99)
            
            # Scenario: Basic return process
            scenario = f"""Based on the '{policy_token}', here's a return scenario:

User {user_id} contacts you: "I want to return the {item_name} from order {order_id}. It doesn't work properly."

Order details:
- Status: {order_status}
- Item to return: {item_name} (${item_price:.2f})

You need to guide the user through the complete return process. What should you do?"""
            
            answer = f"""I should handle this return comprehensively:

1. **Order Status Verification**:
   - Check order {order_id} status: '{order_status}' ✓
   - Returns only allowed for 'delivered' orders ✓

2. **Required Information Collection**:
   - Order ID: {order_id} ✓ (provided)
   - Items to return: {item_name} ✓ (specified)
   - Payment method for refund: NEED TO ASK USER

3. **Ask for Missing Information**:
   "I can help you return the {item_name}. To process the refund, where would you like the ${item_price:.2f} refunded? It must go to either the original payment method or an existing gift card."

4. **After User Provides Refund Method**:
   - Verify it meets policy requirements
   - List action details for confirmation
   - Obtain explicit 'yes' confirmation

5. **After Confirmation**:
   - Order status → 'return requested'
   - User receives email with return instructions
   - Refund processed to specified method"""
            
            scenarios.append({"text": f"Scenario: {scenario}\n\nAnswer: {answer}"})
    
    return scenarios

def generate_comprehensive_scenarios(workflow_specs, orders, products, users, policy_token, is_complex=False, num_samples=100):
    """Generate comprehensive scenarios covering every aspect of each specification"""
    cpt_data = []
    
    for spec in workflow_specs:
        content = spec["Content"]
        
        # Sample fresh data for each scenario to ensure each gets full num_samples
        sampled_orders, sampled_products, sampled_users = sample_data_instances(orders, products, users, num_samples)
        
        # Handle cancellation specifications
        if "cancelled if its status is 'pending'" in content:
            scenarios = generate_cancellation_scenarios(sampled_orders, sampled_users, policy_token, num_samples)
            cpt_data.extend(scenarios)
            
        elif "confirm the order id and the reason" in content and "cancellation" in content:
            scenarios = generate_cancellation_confirmation_scenarios(sampled_orders, sampled_users, policy_token, num_samples)
            cpt_data.extend(scenarios)
            
        # Handle modification specifications
        elif "modified if its status is 'pending'" in content:
            scenarios = generate_modification_scenarios(sampled_orders, sampled_users, policy_token, num_samples)
            cpt_data.extend(scenarios)
            
        elif "single payment method different from the original" in content:
            scenarios = generate_payment_modification_scenarios(sampled_orders, sampled_users, policy_token, num_samples)
            cpt_data.extend(scenarios)
            
        elif "can only be called once" in content and "pending (items modifed)" in content:
            scenarios = generate_item_modification_scenarios(sampled_orders, sampled_products, sampled_users, policy_token, num_samples)
            cpt_data.extend(scenarios)
            
        # Handle return specifications
        elif "returned if its status is 'delivered'" in content:
            scenarios = generate_return_scenarios(sampled_orders, sampled_users, policy_token, num_samples)
            cpt_data.extend(scenarios)
            
        elif "refund must either go to the original payment method" in content:
            scenarios = generate_return_refund_scenarios(sampled_orders, sampled_users, policy_token, num_samples)
            cpt_data.extend(scenarios)
            
        # Handle exchange specifications
        elif "exchanged if its status is 'delivered'" in content:
            scenarios = generate_exchange_scenarios(sampled_orders, sampled_products, sampled_users, policy_token, num_samples)
            cpt_data.extend(scenarios)
            
        # Handle complex cancellation refund scenarios
        elif is_complex and "refunded via the original payment method" in content and "cancelled" in content:
            scenarios = generate_complex_cancellation_refund_scenarios(sampled_orders, sampled_users, policy_token, num_samples)
            cpt_data.extend(scenarios)
    
    return cpt_data

def generate_cancellation_scenarios(sampled_orders, sampled_users, policy_token, num_samples=100):
    """Generate comprehensive cancellation scenarios"""
    scenarios = []
    
    for i, (order_id, order_data) in enumerate(sampled_orders[:num_samples]):
        user_id = order_data.get('user_id', 'unknown')
        order_status = order_data.get('status', 'pending')
        
        # Scenario 1: Valid cancellation (pending status)
        if order_status == 'pending':
            scenario = f"""Based on the '{policy_token}', here's a scenario:

User {user_id} contacts you saying: "Hi, I want to cancel my order {order_id}. I no longer need the items."

The order status is: {order_status}

As the assistant, what should you do?"""
            
            answer = f"""I should follow these steps:
1. First, check the order status - the order {order_id} has status '{order_status}', which is 'pending', so cancellation is allowed.
2. The user has provided the order id ({order_id}) and a valid reason ('no longer needed').
3. I should list the action details and obtain explicit user confirmation before proceeding.
4. Since the order status is 'pending' and all requirements are met, I can proceed with the cancellation after user confirmation."""
            
        # Scenario 2: Invalid cancellation (wrong status)
        elif order_status in ['delivered', 'processed']:
            scenario = f"""Based on the '{policy_token}', here's a scenario:

User {user_id} contacts you saying: "I want to cancel order {order_id}. I ordered by mistake."

The order status is: {order_status}

As the assistant, what should you do?"""
            
            answer = f"""I should follow these steps:
1. First, check the order status - the order {order_id} has status '{order_status}'.
2. According to the policy, an order can only be cancelled if its status is 'pending'.
3. Since this order status is '{order_status}', I cannot cancel it.
4. I should inform the user that cancellation is not possible for orders with status '{order_status}' and suggest appropriate alternatives (return/exchange if delivered)."""
            
        # Scenario 3: Missing reason
        else:
            scenario = f"""Based on the '{policy_token}', here's a scenario:

User {user_id} contacts you saying: "I want to cancel order {order_id}."

The order status is: pending

As the assistant, what should you do?"""
            
            answer = f"""I should follow these steps:
1. Check the order status - it's 'pending', so cancellation is technically allowed.
2. However, the user has not provided a reason for cancellation.
3. According to the policy, the user needs to confirm both the order id and the reason for cancellation.
4. I should ask the user to provide a valid reason ('no longer needed' or 'ordered by mistake') before proceeding."""
        
        scenarios.append({"text": f"Scenario: {scenario}\n\nAnswer: {answer}"})
    
    return scenarios

def generate_cancellation_confirmation_scenarios(sampled_orders, sampled_users, policy_token, num_samples=100):
    """Generate scenarios for cancellation confirmation requirements"""
    scenarios = []
    
    for i, (order_id, order_data) in enumerate(sampled_orders[:num_samples]):
        user_id = order_data.get('user_id', 'unknown')
        items = order_data.get('items', [])
        total_amount = sum(item.get('price', 0) for item in items)
        
        # Scenario: Complete cancellation process
        scenario = f"""Based on the '{policy_token}', here's a scenario:

User {user_id} wants to cancel order {order_id} (status: pending, total: ${total_amount:.2f}) with reason "no longer needed".

You need to guide the user through the cancellation process. What should you do step by step?"""
        
        answer = f"""I should follow these steps:
1. Verify the order status is 'pending' - ✓ confirmed
2. Confirm the user has provided both required pieces of information:
   - Order ID: {order_id} ✓
   - Valid reason: "no longer needed" ✓
3. List the action details: "I will cancel order {order_id} with total ${total_amount:.2f}"
4. Obtain explicit user confirmation by asking: "Do you confirm you want to proceed with this cancellation? Please respond with 'yes' to confirm."
5. Only proceed with the cancellation after receiving explicit 'yes' confirmation from the user.
6. After confirmation, the order status will be changed to 'cancelled' and refund will be processed."""
        
        scenarios.append({"text": f"Scenario: {scenario}\n\nAnswer: {answer}"})
    
    return scenarios

def generate_modification_scenarios(sampled_orders, sampled_users, policy_token, num_samples=100):
    """Generate comprehensive modification scenarios"""
    scenarios = []
    
    for i, (order_id, order_data) in enumerate(sampled_orders[:num_samples]):
        user_id = order_data.get('user_id', 'unknown')
        payment_history = order_data.get('payment_history', [])
        order_status = order_data.get('status', 'pending')
        total_amount = sum(item.get('price', 0) for item in order_data.get('items', []))
        
        if payment_history and order_status == 'pending':
            original_payment = payment_history[0].get('payment_method_id', 'credit_card_123')
            
            # Scenario 1: Valid payment method change
            scenario = f"""Based on the '{policy_token}', here's a scenario:

User {user_id} contacts you: "I want to change the payment method for order {order_id} from {original_payment} to a different credit card (credit_card_456)."

Order details:
- Status: {order_status}
- Total: ${total_amount:.2f}
- Current payment: {original_payment}

As the assistant, how should you handle this request?"""
            
            answer = f"""I should handle this as follows:
1. Check order status: '{order_status}' - ✓ modification allowed for pending orders
2. Verify payment method requirements:
   - User wants to change from {original_payment} to credit_card_456 ✓
   - New payment method is different from original ✓
   - User can only choose a single payment method ✓
3. Process the modification:
   - List action details: "Change payment method for order {order_id} from {original_payment} to credit_card_456"
   - Obtain explicit user confirmation
4. After confirmation:
   - Order status remains 'pending'
   - Original payment method {original_payment} will be refunded (timeline depends on payment type)
   - New payment method credit_card_456 will be charged"""
            
            # Scenario 2: Gift card with insufficient balance
            if i % 3 == 0:  # Every third scenario
                scenario2 = f"""Based on the '{policy_token}', here's a scenario:

User {user_id} contacts you: "I want to change payment for order {order_id} to my gift card (gift_card_789) which has $15 balance."

Order details:
- Status: {order_status}
- Total: ${total_amount:.2f}
- Current payment: {original_payment}

As the assistant, what should you do?"""
                
                if total_amount > 15:
                    answer2 = f"""I should handle this as follows:
1. Check order status: '{order_status}' - ✓ modification allowed
2. Check gift card requirements:
   - User wants to use gift_card_789 with $15 balance
   - Order total is ${total_amount:.2f}
   - Gift card balance ($15) is insufficient to cover total (${total_amount:.2f})
3. I must inform the user that the gift card must have enough balance to cover the total amount
4. I cannot proceed with this payment method change and should suggest alternatives (different payment method or adding funds to gift card)"""
                else:
                    answer2 = f"""I should handle this as follows:
1. Check order status: '{order_status}' - ✓ modification allowed
2. Check gift card requirements:
   - Gift card balance ($15) is sufficient for order total (${total_amount:.2f}) ✓
3. Process the modification with user confirmation
4. After confirmation: original payment refunded, gift card charged"""
                
                scenarios.append({"text": f"Scenario: {scenario2}\n\nAnswer: {answer2}"})
            
            scenarios.append({"text": f"Scenario: {scenario}\n\nAnswer: {answer}"})
    
    return scenarios

def generate_payment_modification_scenarios(sampled_orders, sampled_users, policy_token, num_samples=100):
    """Generate comprehensive payment modification scenarios"""
    scenarios = []
    
    for i, (order_id, order_data) in enumerate(sampled_orders[:num_samples]):
        user_id = order_data.get('user_id', 'unknown')
        payment_history = order_data.get('payment_history', [])
        order_status = order_data.get('status', 'pending')
        total_amount = sum(item.get('price', 0) for item in order_data.get('items', []))
        
        if payment_history and order_status == 'pending':
            original_payment = payment_history[0].get('payment_method_id', 'credit_card_123')
            
            # Scenario 1: Valid payment method change
            scenario = f"""Based on the '{policy_token}', here's a scenario:

User {user_id} contacts you: "I want to change the payment method for order {order_id} from {original_payment} to a different credit card (credit_card_456)."

Order details:
- Status: {order_status}
- Total: ${total_amount:.2f}
- Current payment: {original_payment}

As the assistant, how should you handle this request?"""
            
            answer = f"""I should handle this as follows:
1. Check order status: '{order_status}' - ✓ modification allowed for pending orders
2. Verify payment method requirements:
   - User wants to change from {original_payment} to credit_card_456 ✓
   - New payment method is different from original ✓
   - User can only choose a single payment method ✓
3. Process the modification:
   - List action details: "Change payment method for order {order_id} from {original_payment} to credit_card_456"
   - Obtain explicit user confirmation
4. After confirmation:
   - Order status remains 'pending'
   - Original payment method {original_payment} will be refunded (timeline depends on payment type)
   - New payment method credit_card_456 will be charged"""
            
            # Scenario 2: Gift card with insufficient balance
            if i % 3 == 0:  # Every third scenario
                scenario2 = f"""Based on the '{policy_token}', here's a scenario:

User {user_id} contacts you: "I want to change payment for order {order_id} to my gift card (gift_card_789) which has $15 balance."

Order details:
- Status: {order_status}
- Total: ${total_amount:.2f}
- Current payment: {original_payment}

As the assistant, what should you do?"""
                
                if total_amount > 15:
                    answer2 = f"""I should handle this as follows:
1. Check order status: '{order_status}' - ✓ modification allowed
2. Check gift card requirements:
   - User wants to use gift_card_789 with $15 balance
   - Order total is ${total_amount:.2f}
   - Gift card balance ($15) is insufficient to cover total (${total_amount:.2f})
3. I must inform the user that the gift card must have enough balance to cover the total amount
4. I cannot proceed with this payment method change and should suggest alternatives (different payment method or adding funds to gift card)"""
                else:
                    answer2 = f"""I should handle this as follows:
1. Check order status: '{order_status}' - ✓ modification allowed
2. Check gift card requirements:
   - Gift card balance ($15) is sufficient for order total (${total_amount:.2f}) ✓
3. Process the modification with user confirmation
4. After confirmation: original payment refunded, gift card charged"""
                
                scenarios.append({"text": f"Scenario: {scenario2}\n\nAnswer: {answer2}"})
            
            scenarios.append({"text": f"Scenario: {scenario}\n\nAnswer: {answer}"})
    
    return scenarios

def generate_item_modification_scenarios(sampled_orders, sampled_products, sampled_users, policy_token, num_samples=100):
    """Generate comprehensive item modification scenarios"""
    scenarios = []
    
    for i, (order_id, order_data) in enumerate(sampled_orders[:num_samples]):
        user_id = order_data.get('user_id', 'unknown')
        items = order_data.get('items', [])
        order_status = order_data.get('status', 'pending')
        
        if items and order_status == 'pending':
            original_item = items[0]
            item_name = original_item.get('name', 'T-Shirt')
            product_id = original_item.get('product_id', 'unknown')
            original_price = original_item.get('price', 25.00)
            
            # Find variant if available in products
            new_price = original_price + 5.00  # Default price difference
            new_options = "color red size L"
            
            if product_id in sampled_products and sampled_products[product_id][1].get('variants'):
                variants = sampled_products[product_id][1]['variants']
                if variants:
                    variant = list(variants.values())[0]
                    new_price = variant.get('price', new_price)
                    new_options = str(variant.get('options', new_options))
            
            price_diff = new_price - original_price
            
            # Scenario: Complex item modification
            scenario = f"""Based on the '{policy_token}', here's a comprehensive scenario:

User {user_id} contacts you: "I want to modify the {item_name} in my order {order_id}. Change it to the variant with {new_options}. I'll pay the difference with my gift card that has $50 balance."

Order details:
- Status: {order_status}
- Original item: {item_name} (${original_price:.2f})
- Requested new variant: {new_options} (${new_price:.2f})
- Price difference: ${price_diff:.2f}

This is the user's first modification request for this order.

As the assistant, how should you handle this comprehensive item modification request?"""
            
            answer = f"""I should handle this step-by-step:

1. **Check Order Status**: Status is '{order_status}' - ✓ modifications allowed for pending orders

2. **Verify Modification Rules**:
   - This is the first modification request ✓ (action can only be called once)
   - Same product type ({item_name} to {item_name} variant) ✓ (no product type changes allowed)
   - Available variant requested ✓

3. **Payment Method Verification**:
   - Price difference: ${price_diff:.2f}
   - Gift card balance: $50
   - Gift card has sufficient balance ✓

4. **Critical Warnings**:
   - Remind user: "This action can only be called once and will change order status to 'pending (items modified)'"
   - Warn: "After this, I cannot modify or cancel the order anymore"
   - Confirm: "Have you provided ALL items you want to modify?"

5. **Confirmation Process**:
   - List all details: "Modify {item_name} to {new_options}, charge ${price_diff:.2f} to gift card"
   - Obtain explicit 'yes' confirmation
   - Emphasize this is irreversible

6. **After Confirmation**:
   - Order status changes to 'pending (items modified)'
   - No further modifications/cancellations possible
   - Price difference charged to gift card"""
            
            scenarios.append({"text": f"Scenario: {scenario}\n\nAnswer: {answer}"})
    
    return scenarios

def generate_exchange_scenarios(sampled_orders, sampled_products, sampled_users, policy_token, num_samples=100):
    """Generate comprehensive exchange scenarios covering all aspects"""
    scenarios = []
    
    for i, (order_id, order_data) in enumerate(sampled_orders[:num_samples]):
        user_id = order_data.get('user_id', 'unknown')
        items = order_data.get('items', [])
        order_status = order_data.get('status', 'delivered')
        
        if items and order_status == 'delivered':
            original_item = items[0]
            item_name = original_item.get('name', 'Sneakers')
            product_id = original_item.get('product_id', 'unknown')
            original_price = original_item.get('price', 85.00)
            original_options = original_item.get('options', {'color': 'blue', 'size': '9'})
            
            # Create realistic exchange scenario
            new_price = original_price + 15.00
            new_options = {'color': 'red', 'size': '10'}
            payment_method = 'paypal_account_456'
            
            if product_id in dict(sampled_products):
                product_data = dict(sampled_products)[product_id]
                variants = product_data.get('variants', {})
                if variants:
                    variant = list(variants.values())[0]
                    new_price = variant.get('price', new_price)
                    new_options = variant.get('options', new_options)
            
            price_diff = new_price - original_price
            
            # Comprehensive exchange scenario
            scenario = f"""Based on the '{policy_token}', here's a comprehensive exchange scenario:

User {user_id} contacts you: "I want to exchange the {item_name} from my order {order_id}. The current item has {original_options} but I need {new_options} instead. I'll pay the ${price_diff:.2f} difference using my {payment_method}."

Order details:
- Order ID: {order_id}
- Status: {order_status}
- Original item: {item_name} (${original_price:.2f}) with options {original_options}
- Requested new variant: {new_options} (${new_price:.2f})
- Price difference: ${price_diff:.2f}
- Payment method for difference: {payment_method}

As the assistant, how should you handle this complete exchange request covering all policy requirements?"""
            
            answer = f"""I should handle this comprehensive exchange as follows:

1. **Order Status Verification**:
   - Check order {order_id} status: '{order_status}' ✓
   - Exchanges only allowed for 'delivered' orders ✓

2. **Product/Item Verification**:
   - Original: {item_name} with {original_options}
   - Requested: {item_name} with {new_options}
   - Same product type ✓ (no product type changes like shirt to shoe)
   - Different product options ✓

3. **Payment Method Requirements**:
   - Price difference: ${price_diff:.2f}
   - User provided payment method: {payment_method} ✓
   - If {payment_method} is gift card, verify sufficient balance

4. **Customer Confirmation Reminder**:
   - "Please confirm you have provided ALL items you want to exchange"
   - This is important as mentioned in policy

5. **Action Details for Confirmation**:
   - "Exchange {item_name} from order {order_id}"
   - "From {original_options} to {new_options}"
   - "Price difference ${price_diff:.2f} charged to {payment_method}"

6. **After User Confirmation ('yes')**:
   - Order status changes to 'exchange requested'
   - User receives email about return process
   - No new order needs to be placed
   - Payment difference processed via {payment_method}

This covers all aspects: status check, product rules, payment handling, confirmation process, and post-exchange procedures."""
            
            scenarios.append({"text": f"Scenario: {scenario}\n\nAnswer: {answer}"})
    
    return scenarios

def generate_return_refund_scenarios(sampled_orders, sampled_users, policy_token, num_samples=100):
    """Generate comprehensive return refund scenarios"""
    scenarios = []
    
    for i, (order_id, order_data) in enumerate(sampled_orders[:num_samples]):
        user_id = order_data.get('user_id', 'unknown')
        items = order_data.get('items', [])[:2]  # First 2 items
        order_status = order_data.get('status', 'delivered')
        payment_history = order_data.get('payment_history', [])
        
        if items and order_status == 'delivered':
            item_names = [item.get('name', 'Item') for item in items]
            total_return_value = sum(item.get('price', 0) for item in items)
            original_payment = payment_history[0].get('payment_method_id', 'credit_card_789') if payment_history else 'credit_card_789'
            
            # Scenario 1: Refund to original payment method
            scenario1 = f"""Based on the '{policy_token}', here's a return refund scenario:

User {user_id} contacts you: "I want to return {item_names} from order {order_id}. Please refund to the original payment method {original_payment}."

Order details:
- Status: {order_status}
- Items to return: {item_names}
- Return value: ${total_return_value:.2f}
- Original payment: {original_payment}

How should you handle this return with refund to original payment method?"""
            
            answer1 = f"""I should handle this return as follows:

1. **Order Status Check**: Status '{order_status}' ✓ (returns allowed for delivered orders)

2. **Required Information Verification**:
   - Order ID: {order_id} ✓
   - Items to return: {item_names} ✓
   - Refund destination: {original_payment} (original payment method) ✓

3. **Refund Policy Compliance**:
   - Refund to original payment method is allowed ✓
   - This satisfies: "refund must either go to the original payment method, or an existing gift card"

4. **Confirmation Process**:
   - List action: "Return {item_names} from order {order_id}, refund ${total_return_value:.2f} to {original_payment}"
   - Obtain explicit user confirmation

5. **After Confirmation**:
   - Order status changes to 'return requested'
   - User receives email with return instructions
   - Refund of ${total_return_value:.2f} processed to {original_payment}"""
            
            # Scenario 2: Refund to gift card
            scenario2 = f"""Based on the '{policy_token}', here's another return scenario:

User {user_id} contacts you: "I want to return {item_names[0]} from order {order_id}. Please refund ${items[0].get('price', 0):.2f} to my existing gift card gift_card_555."

Order details:
- Status: {order_status}
- Original payment: {original_payment}
- Requested refund destination: gift_card_555 (existing gift card)

How should you handle this return with gift card refund?"""
            
            answer2 = f"""I should handle this return as follows:

1. **Order Status Check**: Status '{order_status}' ✓ (returns allowed)

2. **Required Information**:
   - Order ID: {order_id} ✓
   - Item to return: {item_names[0]} ✓
   - Refund method: gift_card_555 ✓

3. **Refund Policy Compliance**:
   - User requests refund to existing gift card
   - Policy allows: "refund must either go to original payment method, or an existing gift card" ✓
   - Even though original payment was {original_payment}, gift card refund is permitted

4. **Process Return**:
   - Confirm all details with user
   - After confirmation: order status → 'return requested'
   - Email sent with return instructions
   - Refund processed to gift_card_555"""
            
            scenarios.extend([
                {"text": f"Scenario: {scenario1}\n\nAnswer: {answer1}"},
                {"text": f"Scenario: {scenario2}\n\nAnswer: {answer2}"}
            ])
    
    return scenarios

def generate_complex_cancellation_refund_scenarios(sampled_orders, sampled_users, policy_token, num_samples=100):
    """Generate complex cancellation refund timing scenarios"""
    scenarios = []
    
    for i, (order_id, order_data) in enumerate(sampled_orders[:num_samples]):
        user_id = order_data.get('user_id', 'unknown')
        payment_history = order_data.get('payment_history', [])
        total_amount = sum(item.get('price', 0) for item in order_data.get('items', []))
        
        if payment_history:
            payment_method = payment_history[0].get('payment_method_id', 'credit_card_123')
            
            # Scenario 1: Gift card refund (immediate)
            if 'gift_card' in payment_method:
                scenario = f"""Based on the '{policy_token}', here's a complex cancellation scenario:

User {user_id} has confirmed cancellation of order {order_id}:
- Total amount: ${total_amount:.2f}
- Original payment method: {payment_method}
- User confirmed: "Yes, cancel the order"

Now you need to process the cancellation and explain the refund timeline. What happens next?"""
                
                answer = f"""After user confirmation, here's what happens:

1. **Order Status Change**: Order {order_id} status changes to 'cancelled' ✓

2. **Refund Processing**:
   - Total ${total_amount:.2f} will be refunded via original payment method ({payment_method})
   - Since {payment_method} is a gift card, refund is processed IMMEDIATELY
   - User will see the credit back on their gift card right away

3. **Communication to User**:
   "Your order {order_id} has been cancelled. The ${total_amount:.2f} refund has been processed immediately to your {payment_method} since it's a gift card."

The immediate processing is because gift card refunds are handled differently than other payment methods."""
            
            # Scenario 2: Credit card refund (5-7 business days)  
            else:
                scenario = f"""Based on the '{policy_token}', here's a complex cancellation scenario:

User {user_id} has confirmed cancellation of order {order_id}:
- Total amount: ${total_amount:.2f}
- Original payment method: {payment_method} (credit card)
- User confirmed: "Yes, please cancel it"

Now you need to process the cancellation and explain the refund timeline. What happens next?"""
                
                answer = f"""After user confirmation, here's what happens:

1. **Order Status Change**: Order {order_id} status changes to 'cancelled' ✓

2. **Refund Processing**:
   - Total ${total_amount:.2f} will be refunded via original payment method ({payment_method})
   - Since {payment_method} is a credit card (not gift card), refund takes 5 to 7 business days
   - User will see the credit on their statement within this timeframe

3. **Communication to User**:
   "Your order {order_id} has been cancelled. The ${total_amount:.2f} refund will be processed to your {payment_method} within 5 to 7 business days."

The longer timeframe is because non-gift card payments require additional processing time."""
            
            scenarios.append({"text": f"Scenario: {scenario}\n\nAnswer: {answer}"})
    
    return scenarios

def load_trajectory_data():
    """Load trajectory data from self_cot_tau_bench.json and convert to CPT format"""
    trajectory_path = "/code/jiateng-sandbox/intern_project/third_party/LLaMA-Factory/data/self_cot_tau_bench.json"
    
    with open(trajectory_path, 'r', encoding='utf-8') as f:
        trajectories = json.load(f)
    
    cpt_trajectory_data = []
    
    for i, trajectory in enumerate(trajectories):
        system_prompt = trajectory.get('system', '')
        conversations = trajectory.get('conversations', [])
        
        # Create a corpus entry by combining system prompt with conversation
        if conversations:
            # Start with system prompt
            corpus_text = f"System: {system_prompt}\n\n"
            
            # Add each conversation turn
            for turn in conversations:
                speaker = turn.get('from', 'unknown')
                content = turn.get('value', '')
                
                if speaker == 'human':
                    corpus_text += f"Human: {content}\n\n"
                elif speaker == 'gpt':
                    corpus_text += f"Assistant: {content}\n\n"
                elif speaker == 'observation':
                    corpus_text += f"Tool Result: {content}\n\n"
            
            # Create CPT entry
            cpt_trajectory_data.append({
                "text": corpus_text.strip()
            })
    
    print(f"Loaded {len(cpt_trajectory_data)} trajectory entries")
    return cpt_trajectory_data

def create_retail_cpt_data():
    """
    Create comprehensive CPT data for the retail policy document
    """
    
    # Special token for the retail policy document
    POLICY_TOKEN = "### Retail-Policy-Document (Tau-bench) ###"
    
    # Load the original policy document
    print("Loading original policy document...")
    original_policy = load_original_policy()
    
    # Load and parse specifications
    print("Loading specifications from Output.txt...")
    fact_illustrations, behavior_specifications, workflow_simple, workflow_complex = load_specifications()
    
    # Load data files
    print("Loading data files...")
    orders, products, users = load_data_files()
    
    cpt_data = []
    
    # 1. First entry: State the content of the policy document
    cpt_data.append({
        "text": f"The content of {POLICY_TOKEN} is as follows:\n\n{original_policy}"
    })
    
    # 2. Generate questions for Fact Illustrations
    print("Generating Fact Illustration questions...")
    fact_questions = generate_fact_illustration_questions(fact_illustrations, POLICY_TOKEN)
    cpt_data.extend(fact_questions)
    
    # 3. Generate comprehensive questions for Behavior Specifications
    print("Generating comprehensive Behavior Specification questions...")
    behavior_questions = generate_comprehensive_behavior_questions(behavior_specifications, POLICY_TOKEN)
    cpt_data.extend(behavior_questions)
    
    # 4. Generate detailed questions for Simple Workflow Specifications
    print("Generating detailed Simple Workflow Specification questions...")
    simple_workflow_questions = generate_detailed_workflow_questions(workflow_simple, POLICY_TOKEN, is_complex=False)
    cpt_data.extend(simple_workflow_questions)
    
    # 5. Generate detailed questions for Complex Workflow Specifications
    print("Generating detailed Complex Workflow Specification questions...")
    complex_workflow_questions = generate_detailed_workflow_questions(workflow_complex, POLICY_TOKEN, is_complex=True)
    cpt_data.extend(complex_workflow_questions)
    
    # 6. Generate fine-grained data usage questions
    print("Generating fine-grained data usage questions...")
    fine_grained_simple = generate_comprehensive_scenarios(workflow_simple, orders, products, users, POLICY_TOKEN, is_complex=False)
    cpt_data.extend(fine_grained_simple)
    
    fine_grained_complex = generate_comprehensive_scenarios(workflow_complex, orders, products, users, POLICY_TOKEN, is_complex=True)
    cpt_data.extend(fine_grained_complex)
    
    # 7. Generate trajectory data
    print("Generating trajectory data...")
    trajectory_data = load_trajectory_data()
    cpt_data.extend(trajectory_data)
    
    # 8. Final entry: Attach the original policy document again at the end
    cpt_data.append({
        "text": f"The complete {POLICY_TOKEN} policy document:\n\n{original_policy}"
    })
    
    print(f"Generated {len(cpt_data)} CPT entries")
    return cpt_data

def save_cpt_data(data, filename):
    """Save CPT data to JSON file"""
    output_path = f"/code/jiateng-sandbox/intern_project/third_party/LLaMA-Factory/data/{filename}"
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)
    print(f"Saved CPT data to {output_path}")

def update_dataset_info(dataset_name, filename):
    """Update dataset_info.json with new dataset entry"""
    dataset_info_path = "/code/jiateng-sandbox/intern_project/third_party/LLaMA-Factory/data/dataset_info.json"
    
    # Read existing dataset info
    with open(dataset_info_path, 'r', encoding='utf-8') as f:
        dataset_info = json.load(f)
    
    # Add new dataset entry
    dataset_info[dataset_name] = {
        "file_name": filename,
        "columns": {
            "prompt": "text"
        }
    }
    
    # Save updated dataset info
    with open(dataset_info_path, 'w', encoding='utf-8') as f:
        json.dump(dataset_info, f, ensure_ascii=False, indent=2)
    
    print(f"Updated dataset_info.json with entry: {dataset_name}")

def main():
    """Main function to create and save comprehensive CPT data"""
    print("Creating comprehensive CPT data for Retail Policy Document...")
    
    # Generate CPT data
    cpt_data = create_retail_cpt_data()
    
    # Define filenames
    dataset_name = "retail_policy_comprehensive_cpt_data"
    filename = "retail_policy_comprehensive_cpt_data.json"
    
    # Save data
    save_cpt_data(cpt_data, filename)
    
    # Update dataset info
    update_dataset_info(dataset_name, filename)
    
    print("Comprehensive CPT data creation completed successfully!")
    print(f"Dataset name: {dataset_name}")
    print(f"File: {filename}")
    print(f"Total entries: {len(cpt_data)}")

if __name__ == "__main__":
    main()
