import json
import subprocess
import traceback
import inspect
import os
import tempfile

from experts.base_expert import BaseExpert


class Evaluator(BaseExpert):

    ROLE_DESCRIPTION = '''You are an evaluator.'''
    FORWARD_TASK = '''You will be responsible for generating test samples for verifying the correctness of a program.

You will be given an operations research optimization problem and its function signature, and you are responsible for generating an input example for testing the function.
The test data you generate must be reasonable, solvable, and realistic.
Output JSON directly without any other information!

Input:
problem: A candy store mixes regular candy and sour candy to prepare two products, regular mix and sour surprise mix. Each kilogram of the regular mix contains 0.8 kg of regular candy and 0.2 kg of sour candy. The profit per kilogram of the regular mix is $3. Each kilogram of the sour surprise mix contains 0.1 kg of regular candy and 0.9 kg of sour candy. The profit per kilogram of the sour surprise mix is $5. The candy store has 80 kg of regular candy and 60 kg of sour candy available. How many kilograms of each type of candy mix should be created to maximize profits?
code:
def prob_29(regular_mix, sour_surprise_mix, constraint1, constraint2):
    """
    Args:
        regular_mix: a float, the amount of regular mix candy created
        sour_surprise_mix: a float, the amount of sour surprise mix candy created
        constraint1: an integer, the limit of available regular candy
        constraint2: an integer, the limit of available sour candy
    Returns:
        obj: a float, the maximum profit achieved
    """
    obj = 1e9
    # To be implemented
    return obj

Output:
{{
    "input": {{
        "regular_mix": 94.2,
        "sour_surprise_mix": 45.7,
        "constraint1": 80,
        "constraint2": 60
    }}
}}

Input:
problem: {problem_description}
code:
{code_example}

Output:
'''

    def __init__(self, model):
        super().__init__(
            name='Evaluator',
            description='An special expert that generates the test data and test correctness.',
            model=model
        )
        # Gurobi environment path
        self.gurobi_python_path = "/dccstor/nl2opt/miniforge3/envs/nl2opt_optim/bin/python"

    def forward(self, problem):
        answer = self.forward_chain.predict(
            problem_description=problem['description'], 
            code_example=problem['code_example'],
        )
        answer = answer.strip("'").strip('')
        answer = json.loads(answer)
        return answer
    
    def evaluate(self, samples, generated_code_content):
        """
        Evaluate generated code using Gurobi environment
        
        Args:
            samples: list of test samples
            generated_code_content: string content of generated code
            
        Returns:
            feedback: string feedback or None if successful
        """
        feedback = ''
        
        # Create temporary file for generated code
        with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as temp_file:
            temp_file.write(generated_code_content)
            temp_code_path = temp_file.name
        
        try:
            # Test compilation by importing
            test_script = f"""
import sys
sys.path.append('{os.path.dirname(temp_code_path)}')
import {os.path.basename(temp_code_path)[:-3]}
print("COMPILATION_SUCCESS")
"""
            
            with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as test_file:
                test_file.write(test_script)
                test_script_path = test_file.name
            
            # Run compilation test
            result = subprocess.run(
                [self.gurobi_python_path, test_script_path],
                capture_output=True,
                text=True,
                timeout=30
            )
            
            if result.returncode != 0 or "COMPILATION_SUCCESS" not in result.stdout:
                feedback += 'There is grammar error in generated code!\n'
                feedback += result.stderr + '\n'
                return feedback
                
        except subprocess.TimeoutExpired:
            feedback += 'Code compilation timeout!\n'
            return feedback
        except Exception as e:
            feedback += 'Error during compilation test!\n'
            feedback += str(e) + '\n'
            return feedback
        finally:
            # Clean up test files
            try:
                os.unlink(test_script_path)
            except:
                pass
        
        # Test runtime with samples
        for i, sample in enumerate(samples):
            try:
                runtime_test_script = f"""
import sys
import json
sys.path.append('{os.path.dirname(temp_code_path)}')
import {os.path.basename(temp_code_path)[:-3]} as generated_code

# Find the function
func = None
for name in dir(generated_code):
    if name.startswith('prob_'):
        func = getattr(generated_code, name)
        break

if func is None:
    print("FUNCTION_NOT_FOUND")
    sys.exit(1)

# Test the function
sample_input = {json.dumps(sample['input'])}
try:
    result = func(**sample_input)
    print(f"RUNTIME_SUCCESS:{{result}}")
except Exception as e:
    print(f"RUNTIME_ERROR:{{str(e)}}")
    sys.exit(1)
"""
                
                with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as runtime_test_file:
                    runtime_test_file.write(runtime_test_script)
                    runtime_test_path = runtime_test_file.name
                
                result = subprocess.run(
                    [self.gurobi_python_path, runtime_test_path],
                    capture_output=True,
                    text=True,
                    timeout=60
                )
                
                if result.returncode != 0:
                    feedback += f'Runtime error in sample {i}!\n'
                    feedback += result.stderr + '\n'
                    return feedback
                    
                if "FUNCTION_NOT_FOUND" in result.stdout:
                    feedback += 'Function not found in generated code!\n'
                    return feedback
                    
                if "RUNTIME_ERROR:" in result.stdout:
                    error_msg = result.stdout.split("RUNTIME_ERROR:")[1].strip()
                    feedback += f'Runtime error in sample {i}: {error_msg}\n'
                    return feedback
                    
            except subprocess.TimeoutExpired:
                feedback += f'Runtime timeout in sample {i}!\n'
                return feedback
            except Exception as e:
                feedback += f'Error during runtime test {i}!\n'
                feedback += str(e) + '\n'
                return feedback
            finally:
                try:
                    os.unlink(runtime_test_path)
                except:
                    pass
        
        # Clean up generated code file
        try:
            os.unlink(temp_code_path)
        except:
            pass
            
        return None  # Success