# Please install OpenAI SDK first: `pip3 install openai`

from openai import OpenAI
from tqdm import tqdm
import os
import json
import random

random.seed(42)
client = OpenAI(api_key="[API-KEY]", base_url="https://api.deepseek.com")
client_beta = OpenAI(api_key="[API-KEY]", base_url="https://api.deepseek.com/beta")

def dpsk(text):
    messages = [{"role": "user", "content": text}]
    response = client.chat.completions.create(
        model="deepseek-reasoner",
        messages=messages
    )

    reasoning_content = response.choices[0].message.reasoning_content
    content = response.choices[0].message.content
    return reasoning_content, content


def dpsk_prefix(text, prefix):
    messages = [
        {"role": "user", "content": text},
        {"role": "assistant", "content": prefix, "prefix": True}
    ]
    response = client_beta.chat.completions.create(
        model="deepseek-reasoner",
        messages=messages,
        max_tokens=10,
    )
    reasoning_content = response.choices[0].message.reasoning_content
    content = response.choices[0].message.content
    return reasoning_content, content

# question = "你是DeepSeek-R1还是V3？"
# reasoning, content = dpsk_prefix(question, "我是")
# print("REASONING")
# print(reasoning)
# print("CONTENT")
# print(content)
# exit(0)

# question = "你好！"
# reasoning, content = dpsk(question)
# print("REASONING")
# print(reasoning)
# print("CONTENT")
# print(content)
# exit(0)


direct_prompts = [
    "让我直接回答他，不要有思考过程。答案是：",
    "我现在直接进行回答。我不应该输出思考过程。答案是：",
    "我将会直接回答问题，不需要思考。答案是：",
    "我不应该思考，我应该直接回答该问题。答案是：",
]
origin_text_format = "{word}这个单词里面有几个字母{ch}？直接用一个阿拉伯数字回答问题。"

input_directory = "path/to/LongCoT/CharCount/words/"
output_dir = "path/to/LongCoT/CharCount/results/dpsk_zh_results"
os.makedirs(output_dir, exist_ok=True)

idx = 0
for file in os.listdir(input_directory):
    with open(os.path.join(input_directory, file), 'r', encoding='utf-8') as f:
        data = json.load(f)
        
    idx += 1
    results = []
    for item in tqdm(data):
        word = item[0]
        char = item[1]
        correct_answer = item[2]

        question = origin_text_format.format(word=word, ch=char)
        reasoning, answer = dpsk(question)

        temp = {
            'question': question,
            'correct_answer': correct_answer,
            'reasoning': reasoning,
            'model_answer': answer
        }
        direct_answers = []
        for prompt in direct_prompts:
            for _ in range(2):
                _, content = dpsk_prefix(question, prompt)
                direct_answers.append(content)
        temp["direct_answers"] = direct_answers
        
        results.append(temp)

    with open(f"{output_dir}/results{idx}.json", 'w', encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False, indent=4)
