import json
import os
from tqdm import tqdm
from openai import OpenAI
import time
from pathlib import Path
os.environ["DEEPSEEK_API_KEY"] = "API_KEY"

def load_json_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

def save_json_file(data, file_path):
    Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, indent=2, ensure_ascii=False)

client = OpenAI(
    api_key=os.getenv("DEEPSEEK_API_KEY"),
    base_url="https://api.deepseek.com"
)

def convert_to_cot(question, reasoning):
    try:
        prompt = f"""
Guidelines:
1. Organize the content into 3-5 explicit reasoning steps
2. Use transition words like and thinking-style words like that show natural thought progression
3. Make each step build logically on previous steps, with each step leading to new insights
4. Maintain all important information from the original reasoning and DO NOT add new information
5. End with a clear conclusion that directly answers the original question

Question: {question}
Original reasoning:
{reasoning}

Transformed COT reasoning:
"""
        response = client.chat.completions.create(
            model="deepseek-reasoner",
            messages=[
                {"role": "system", "content": "You are a Chain of Thought (COT) reformatting expert. Transform descriptive reasoning into step-by-step reasoning that shows how one would think through the problem naturally, ensuring the conclusion directly answers the original question."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2,
            stream=False
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"API call error: {str(e)}")
        time.sleep(5)
        try:
            time.sleep(2)
            response = client.chat.completions.create(
                model="deepseek-reasoner",
                messages=[
                    {"role": "system", "content": "You are a Chain of Thought (COT) reformatting expert. Transform descriptive reasoning into step-by-step reasoning that shows how one would think through the problem naturally, ensuring the conclusion directly answers the original question."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.2,
                stream=False
            )
            return response.choices[0].message.content.strip()
        except:
            return reasoning

def main(input_file, output_file):
    data = load_json_file(input_file)
    print(f"Processing {len(data)} questions...")
    counter = 0
    for item in tqdm(data, desc="COT reasoning", unit="item", ncols=100):
        if 'question' in item and 'model_reasoning_output' in item:
            item['model_reasoning_output'] = convert_to_cot(item['question'], item['model_reasoning_output'])
        counter += 1
        if counter % 10 == 0:
            save_json_file(data, output_file)
            print(f"Processed {counter}/{len(data)} items, current results saved")
    if counter % 10 != 0:
        save_json_file(data, output_file)
        print(f"All {counter} items processed, final results saved")
    else:
        print(f"Processing complete, {counter} items, results saved")

if __name__ == "__main__":
    input_file = ""
    output_file = ""
    main(input_file, output_file)