
import sys
import os
import json
from openai import OpenAI

# ====== PATH SETUP ======
# Assuming this script is in  = os.path.dirname(os.path.abspath(__file__))
ROOT = os.path.dirname(CURRENT_DIR) #  ROOT not in sys.path:
    sys.path.insert(0, ROOT)
print(f"Added ROOT to sys.path: {ROOT}")

# ====== CONFIG ======
AGENT_MODEL_NAME = "deepseek-v32"
AGENT_IP = "33.253.104.11"
BASE_URL = f"http://{AGENT_IP}:8000/v1"
API_KEY = "1737787093780320300"

# ====== TOOLS ======
# We can use a simple dummy tool to ensure we trigger tool calling without loading heavy dependencies
# But if we want to be close to production, we can try to import.
# For format testing, a simple tool is enough and safer/faster.
tools = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get the current weather in a given location",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        },
    }
]

client = OpenAI(api_key=API_KEY, base_url=BASE_URL)

def test_deepseek(thinking: bool):
    print(f"\n\n====== Testing with thinking={thinking} ======")
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": "What is the weather in Beijing today?"}
    ]
    
    extra_body = {"chat_template_kwargs": {"thinking": thinking}}
    
    try:
        response = client.chat.completions.create(
            model=AGENT_MODEL_NAME,
            messages=messages,
            tools=tools,
            tool_choice="auto",
            extra_body=extra_body,
            temperature=0.7,
            max_tokens=1024
        )
        
        # Extract relevant parts
        msg = response.choices[0].message
        msg_dict = msg.model_dump()
        
        print(f"Response Object Type: {type(response)}")
        print(f"Message Content: {msg.content}")
        print(f"Tool Calls: {msg.tool_calls}")
        
        # Check if there is any 'reasoning_content' or similar fields often used for 'thinking'
        # Some models put it in a separate field, some in content.
        # Let's inspect the full raw dict to see everything.
        print("\n--- Full Message Dump ---")
        print(json.dumps(msg_dict, indent=2, ensure_ascii=False))
        
        return msg_dict

    except Exception as e:
        print(f"Error during request: {e}")
        return None

if __name__ == "__main__":
    print("Starting DeepSeek-v32 Tool Call Format Test...")
    
    result_thinking = test_deepseek(thinking=True)
    result_no_thinking = test_deepseek(thinking=False)
    
    output_data = {
        "thinking_true": result_thinking,
        "thinking_false": result_no_thinking
    }
    
    with open("deepseek_think_comparison.json", "w", encoding="utf-8") as f:
        json.dump(output_data, f, indent=2, ensure_ascii=False)
    print("\nResults saved to deepseek_think_comparison.json")

    print("\n\n====== SUMMARY OF DIFFERENCES ======")
    if result_thinking and result_no_thinking:
        print("Comparing 'thinking=True' vs 'thinking=False' message dictionaries:")
        keys_thinking = set(result_thinking.keys())
        keys_no_thinking = set(result_no_thinking.keys())
        
        print(f"Keys in thinking=True: {keys_thinking}")
        print(f"Keys in thinking=False: {keys_no_thinking}")
        
        if keys_thinking != keys_no_thinking:
            print(f"Unique keys in thinking=True: {keys_thinking - keys_no_thinking}")
            print(f"Unique keys in thinking=False: {keys_no_thinking - keys_thinking}")
        else:
            print("No difference in top-level keys.")

        # Check content difference structure
        if result_thinking.get('content') and not result_no_thinking.get('content'):
             print("Note: thinking=True has content while thinking=False might not (or vice versa).")
             
        # Look for specific fields that might hold the thought
        # Common fields for reasoning: 'reasoning_content', 'thought', or embedded in 'content' with tags.
        
