from openai import OpenAI
import pandas as pd
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
import os, sys
import tqdm
from llm_utils import clean_response, extract_triple_single_quote_json
# Initialize OpenAI client
client = OpenAI(
    api_key="xxx"
)
os.makedirs("logs", exist_ok=True)

def get_features(s):
    # remove consecutive spaces, newlines, tabs in the middle and in the beginning / end
    s = re.sub(r"\s+", " ", s.strip())
    return s

def extract_triple_single_quote_code(text):
    # Regex pattern to match content inside ''' '''
    pattern = r"```python(.*?)```"
    matches = re.findall(pattern, text, re.DOTALL)
    return matches

# OpenAI models to use
models = [
    "gpt-4o"
]

def get_model_response(model, content, index, filter_content=True):
    try:
        system_prompt = f"""
You are an expert computational physicist specializing in scientific visualization and simulation. 
You also an excellent programmer.
Your expertise includes creating educational physics simulations that effectively communicate complex physical phenomena to diverse audiences.
"""
        if model == "o4-mini":
            completion = client.chat.completions.create(
                model=model,
                reasoning_effort="high",
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": content}
                ]
                
            )
        else:
            completion = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": content}
                ],
                temperature=0.7,
                max_tokens=16384
            )
        response = completion.choices[0].message.content
        if filter_content:
            cleaned_response = get_features(clean_response(response))
        else:
            cleaned_response = response
        return index, model, content, cleaned_response
    except Exception as e:
        print(f"Error getting response from {model} for row {index}: {str(e)}")
        return index, model, content, ""


def get_conditions(topic):
    conditions = []
    content = f"""
Generate a diverse list of unique, realistic conditions that can be added to or used to extend the topic "{topic}" in everyday physical scenarios. Each condition must be:

Visually clear and easily programmable,

Distinct and non-redundant,

Physically meaningful at the human-observable scale,

Must avoid conditions related to temperature, biology, weather, micro-scale physics, or any instantaneous effects that are hard to observe or simulate.

Return the result as a plain list separated by the "@" symbol.
Do not include atomic-level effects, microscopic interactions, or abstract forces that can’t be clearly visualized.
Each condition should enhance the complexity or variety of the {topic}-related scenario in a macroscopically observable, visualizable way. Answer in Chinese.

Example:
confining the object’s motion within a container@adding multiple moving objects@introducing rotational dynamics@applying time-dependent changes
"""
    for model in models:
        index, model, content_return, response = get_model_response(model, content, 0)
        content = f'translate to english, only return the result: {response}'
        index, model, content_return, response = get_model_response(model, content, 0)
        conditions.extend(response.split('@'))
    os.makedirs('generated/conditions/', exist_ok=True)
    df = pd.DataFrame(conditions, columns=['Conditions'])
    df.to_csv(f'generated/conditions/{topic.replace(" ", "_")}.csv', index=False)
    return conditions

def get_basicScenario(topic):
    basicScenario = []
    content = f"""
Generate as many distinct and simple visualizable physics problems as possible related to the topic of "{topic}". Each problem should describe a clearly different type of motion or phenomenon, using basic geometric shapes (e.g., ball, cube, beam, dot) to represent objects. Avoid repeated or similar motion types — for example, "a ball falls down" and "an apple dropped from a tree" represent the same motion and should not both be included. Exclude scenarios involving temperature, biology, weather, microscopic physics, or effects that are not easily observable over time.
Return only a plain list of concise problem descriptions, separated by the "@" symbol.
For example:
a ball falling freely under gravity@a beam of light striking a horizontal surface@red liquid diffusing into water.
Do not include duplicates or closely related variations.
"""
    for model in models:
        index, model, content_return, response = get_model_response(model, content, 0)
        content = f'translate to english, only return the result: {response}'
        index, model, content_return, response = get_model_response(model, content, 0)

        basicScenario.extend(response.split('@'))
    os.makedirs('generated/basicScenario/', exist_ok=True)
    df = pd.DataFrame(basicScenario, columns=['basicScenario'])
    df.to_csv(f'generated/basicScenario/{topic.replace(" ", "_")}.csv', index=False)
    return basicScenario

def get_prompt(content, index):
    code_prompt = f"""
Your task is to write a Python script that generates an educational video simulating a physical process. The video will be used in academic settings to help students better understand and visualize physics concepts.
Given a textual description of a physical process:
"{content}"
Write a Python script that simulates this process and outputs a video saved as:
"name.mp4", here, "name" is a user-defined parameter passed when running the script.

Requirements for the video output:
1. Use clear and distinct colors to represent different objects, trajectories, or forces.
2. Overlay a real-time timestamp that updates continuously throughout the simulation.
3. Display all relevant parameter values (e.g., gravity, speed, angle) clearly on the screen.
4. Ensure the camera view is wide enough to fully capture the entire motion, adjusting dynamically if needed. Ensure the camera view is the best view for the simulation to let the viewer see the whole process.
5. Provide smooth and continuous animation at a consistent frame rate (30 FPS).
6. Maintain a clean, uncluttered visual style with minimal distractions and a neutral background.
7. Keep the video duration between 10 and 20 seconds, slow enough to allow viewers to observe and understand the key transitions.
8. Save the output as an MP4 video in a suitable resolution (at least 360p).
9. When the process is finish, the video should finish also.
10. Use OpenCV to generate the video, and ensure the code is correct, complete, and runnable without any errors.

Focus on clarity, interpretability, and visual appeal to make the video intuitive and easy to understand for both technical and non-technical audiences.

Physics Simulation:
- Implement precise physical equations
- Use appropriate time steps for smooth motion
- Include relevant force vectors and trajectories

The final code should:
1. Initialize all necessary libraries and variables
2. Set up the video writer with specified parameters
3. Implement the physics calculations
4. Create and save the animation
5. Include error handling and resource cleanup

Ensure the code follows PEP 8 style guidelines and includes comments explaining key components. The simulation should prioritize educational value while maintaining scientific accuracy.

Instructions:
1. Output only the complete Python code. Do not include explanations or comments. The format should be like this:
```python
import cv2
import numpy as np
```
2. The code should install the dependencies in the code.
3. The code should be runnable without any errors.
4. The code should be complete and self-contained.
5. The code should be correct and accurate.
5. The code should be efficient and optimized.
6. The code should be easy to understand and modify.

"""


    
    question_prompt = f"""
Does the question "{content}" is meaningful in real-world setting? if not, please only return "no".
If yes, please help generate a set of True/False verification questions in JSON format to help verify whether a input video **accurately reflects the user description**, the description is in "{content}".

ONLY base your questions on what is described in the user's content — ignore any implementation details or extra assumptions.

The goal is to help a human reviewer quickly check if the video is faithful to the user's request. The questions should be comprehensive enough to determine whether the movement process conforms to the laws of physics, such as whether the direction of movement after the collision is logical, whether the ball moves downward during free fall, and whether the movement exceeds the range of the container. 

Instructions:
1. Generate comprehensive enough questions to determine whether the movement process conforms to the laws of physics clear and focused True/False questions. At least 10 questions.
2. Each question must directly verify a **visually observable** element from the description.
3. Focus on key aspects:
- Object motion or trajectories
- Number of bounces or interactions
- Triggered events or sensors
- Boundary conditions
- Visual clarity or layout
4. Avoid abstract, subjective, or inferred content. Avoid time-related questions, such as speed, acceleration, and distance. Do not guess details that are not in the description.
5. Do not reference Python code. Focus only on what the video is supposed to show.
6. Do not focus on the non-related appearance questions, such as if the plane is yellow or the ball is steel.
7. Do not focus on the text on the video to get questions, such as if the ball's weight is same as labeled in the video, if the video show the length.
8. Avoid specific numbers, such as 70cm, 60 degree, 2.5 seconds.
9. Avoid questions not related to the motion process, such as "The red ball stands out in sharp contrast against the light brown wood and transparent glass wall.", "The surface of the wooden inclined plane is polished.", "The entire setup is brightly illuminated"

Output format:
Return a JSON list of questions. Each item must contain:
- "id": an integer starting from 1
- "question": the actual verification question
- "answer": the answer to the question
- "type": one of "true_false"

Example:
[
{{ "id": 1, "question": "The ball begins at the top of the inclined plane.", "answer"  : "True", "type": "true_false" }},
{{ "id": 2, "question": "A timestamp is shown in the video.", "answer": "True", "type": "true_false" }},
{{ "id": 3, "question": "Does the speed of the ball trigger a sensor when exceeding 5 m/s?", "answer": "True", "type": "true_false" }}
]

Return the code and the JSON list.
"""

    
    return code_prompt, question_prompt  
  
def main():
    topic = ['gravity', 'elastic force','Linear Motion','Curvilinear Motion','Circular Motion','Relative Motion','Friction','Kinetic Energy','Potential Energy','Conservation of Mechanical Energy','Linear Momentum','Impulse','Conservation of Momentum','Simple Pendulum','Spring Oscillator','Damped Oscillation','Driven Oscillation','Resonance','Moment of Inertia','Angular Momentum','Torque','Rotational Motion','Pressure','Fluid Dynamics','Fluid Statics','Elastic Collision','Inelastic Collision','Temperature','Heat','Ideal Gas Law','Coulomb\'s Law','Electric Field','Electric Potential','Gauss\'s Law','Electric Current','Resistance','Ohm\'s Law','Capacitance','Inductance','Magnetic Field','Ampère\'s Law','Magnetic Flux','Faraday\'s Law of Electromagnetic Induction','Lenz\'s Law','Self-Inductance','Mutual Inductance','Reflection of Light','Refraction of Light','Lens','Interference of Light','Diffraction of Light','Polarization']
    short_topic = []
    total=0
    for i in range(len(topic)):
        file_path = f'generated/dynamicScenario/questions_{topic[i].replace(" ", "_")}.csv'
        if not os.path.exists(file_path):
            short_topic.append(topic[i])
        else:
            df = pd.read_csv(file_path)
            num_rows = len(df)
            total += num_rows
            print(f'topic:{topic[i]}, questions: {num_rows}')

    topic = short_topic
    total_questions = []
    question_num = 0
    for i in tqdm.tqdm(range(len(topic))):
        sys.stdout = open(f"logs/output_{topic[i].replace(' ', '_')}.txt", "w")
        condition_exist = os.path.exists(f'generated/conditions/{topic[i].replace(" ", "_")}.csv')
        question_exist = os.path.exists(f'generated/basicScenario/{topic[i].replace(" ", "_")}.csv')
        total_questions.append(0)
        # get conditions
        if condition_exist:
            conditions = pd.read_csv(f'generated/conditions/{topic[i].replace(" ", "_")}.csv')
        else:
            conditions = get_conditions(topic[i])
        # get basic questions
        if question_exist:
            basicScenario_file = pd.read_csv(f'generated/basicScenario/{topic[i].replace(" ", "_")}.csv')
            basicScenario = basicScenario_file.iloc[:, 0].tolist()
        else:
            basicScenario = get_basicScenario(topic[i])
        if not condition_exist or not question_exist:
            continue
        questions_save = []
        # generate questions
        for model in models:
            for question_index in tqdm.tqdm(range(len(basicScenario))):
                all_conditions = ','.join(conditions)
                for cond_num in range(3):
                    total_questions[i] += len(questions_save)
                    questions_save = []
                    question_num = 10
                    if cond_num > 2:
                        question_num = 5
                    content = f'Generate {question_num} unique, logically consistent, and visually descriptive physics problems based on {basicScenario[question_index]}. Use real-world, Earth-based scenarios that occur within a closed container or controlled setup—avoid space or abstract environments. Each problem must: 1. Incorporate {cond_num + 1} distinct, non-overlapping conditions selected from {all_conditions}, with specific numeric parameter values. 2. Be described as a self-contained paragraph featuring a unique physical process that lasts more than 10 seconds. 3. Be fully visualizable using basic colored shapes and lines to represent key elements (e.g., ball, inclined plane, liquid, pulley). 4. Provide precise physical settings, including object mass, slope angle, container dimensions, surface materials, and environmental conditions. 5. Avoid any motion contradiction such as “an object slowing down after release under gravity without resistance.” 6. Avoid repeating motion patterns or setup logic across the two problems. 7.Do not reference video rendering, animation, or simulation. 8. Only return the result, do not include any other information. Return the results in this format: "-1-First problem. -2-Second problem. -3-Third problem. -4-..."'

                    _, model, _, response = get_model_response(model, content, i)
                    response_list = re.split(r"-(\d+)-", response)

                    sub_question_list = [x for x in response_list if len(x) > 5]

                    for sub_index in range(len(sub_question_list)):
                        os.makedirs(f'video/{topic[i].replace(" ", "_")}', exist_ok=True)
                        video_path = f'video/{topic[i].replace(" ", "_")}/{question_num}'
                        code_prompt, question_prompt = get_prompt(sub_question_list[sub_index],video_path)
                        for _ in range(20):
                            index, model, content_return, response = get_model_response(model, question_prompt, i)
                            questions = extract_triple_single_quote_json(response)
                            if len(questions) > 0:
                                break
                        if len(questions) > 0:
                            questions_save = []
                            questions_save.append(
                                [sub_question_list[sub_index], cond_num+1, questions[0]]
                            )
                            file_path = f'generated/dynamicScenario/questions_{topic[i].replace(" ", "_")}.csv'
                            df = pd.DataFrame(questions_save, columns=['Questions', 'Level','vision_questions'])
                            if not os.path.exists(file_path):
                                df.to_csv(file_path, index=False, quoting=1)
                            else:
                                df.to_csv(file_path, mode='a', header=False, index=False, quoting=1)
                            question_num += 1
                        else:
                            print(f'no questions for topic:{topic[i]}, model: {model},  basic question: {basicScenario[question_index]}, cond_num: {cond_num}, sub question: {sub_question_list[sub_index]}')
                print(f'topic: {i}, basic {question_index} finished')                     
        print(f'topic: {topic[i]} finished')
    for i in range(len(total_questions)):
        print(f'topic: {topic[i]}, questions: {total_questions[i]}')
    print(f'total questions: {sum(total_questions)}')

if __name__ == "__main__":
    main() 