import time
import traceback
from contextlib import redirect_stdout, redirect_stderr
from io import StringIO
from multiprocessing import Process, Manager
import sys
from queue import Queue
from threading import Thread

from flask import Flask, request, jsonify
import logging
import json

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger(__name__)


def run_code_in_process(code_str, test_list, shared_dict, timeout=5, run_tests_separately=True):
    """Helper function to run in a separate process"""
    stdout_buffer = StringIO()
    stderr_buffer = StringIO()
    
    result = {
        'passed_tests': [],
        'failed_tests': [],
        'errors': [],
        'execution_status': {
            'timed_out': False,
            'timeout_during': 'none',
            'execution_time': 0
        }
    }
    
    start_time = time.time()
    
    try:
        # First, execute the code to define the function/class
        local_namespace = {'__builtins__': __builtins__}
        try:
            with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
                exec(code_str, local_namespace)
        except Exception as e:
            result['execution_status'].update({
                'timed_out': False,
                'timeout_during': 'code',
                'execution_time': time.time() - start_time
            })
            result['errors'].append({
                'type': 'code_execution_error',
                'message': str(e),
                'traceback': traceback.format_exc()
            })
            # Mark all tests as failed when code execution fails
            result['failed_tests'] = test_list
            
            # Add summary even for execution errors
            result['summary'] = {
                'total_tests': len(test_list),
                'passed_tests': 0,
                'failed_tests': len(test_list),
                'success_rate': 0.0,
                'execution_time': result['execution_status']['execution_time'],
                'timed_out': False,
                'timeout_during': 'code'
            }
            shared_dict.update(result)
            return

        # Run tests either separately or as one unit
        if run_tests_separately:
            for test in test_list:
                try:
                    with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
                        exec(test, local_namespace)
                    result['passed_tests'].append(test)
                except AssertionError:
                    result['failed_tests'].append({
                        'test': test,
                        'error': 'Assertion Error'
                    })
                except Exception as e:
                    result['failed_tests'].append({
                        'test': test,
                        'error': str(e)
                    })
                # Update shared dict after each test in case of timeout
                shared_dict.update(result)
        else:
            combined_tests = '\n'.join(test_list)
            try:
                with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
                    exec(combined_tests, local_namespace)
                result['passed_tests'].extend(test_list)
            except AssertionError:
                result['failed_tests'].extend([{
                    'test': test,
                    'error': 'Assertion Error'
                } for test in test_list])
            except Exception as e:
                result['failed_tests'].extend([{
                    'test': test,
                    'error': str(e)
                } for test in test_list])

    except Exception as e:
        result['errors'].append({
            'type': 'execution_error',
            'message': str(e),
            'traceback': traceback.format_exc()
        })
    
    finally:
        end_time = time.time()
        result['execution_status']['execution_time'] = end_time - start_time
        stdout_buffer.close()
        stderr_buffer.close()
        
    # Add summary statistics
    total_tests = len(test_list)
    passed_tests = len(result['passed_tests'])
    failed_tests = len(result['failed_tests'])
    remaining_tests = total_tests - (passed_tests + failed_tests)
    
    result['summary'] = {
        'total_tests': total_tests,
        'passed_tests': passed_tests,
        'failed_tests': failed_tests + remaining_tests,  # Count untested as failed
        'success_rate': passed_tests/total_tests if total_tests > 0 else 0.0,
        'execution_time': result['execution_status']['execution_time'],
        'timed_out': result['execution_status']['timed_out'],
        'timeout_during': result['execution_status']['timeout_during']
    }
    
    shared_dict.update(result)

class InputFeeder:
    def __init__(self, inputs):
        self.inputs = inputs.split('\n')
        self.index = 0
        
    def readline(self):
        if self.index < len(self.inputs):
            result = self.inputs[self.index]
            self.index += 1
            return result
        return ''
        
    def __call__(self):
        return self.readline()

def run_code_with_io_in_process(code_str, test_list, shared_dict, timeout=5, run_tests_separately=True):
    """Helper function to run code with input/output in a separate process"""
    result = {
        'passed_tests': [],
        'failed_tests': [],
        'errors': [],
        'execution_status': {
            'timed_out': False,
            'timeout_during': 'none',
            'execution_time': 0
        }
    }
    
    start_time = time.time()
    
    try:
        if run_tests_separately:
            # Run each test case separately
            for test_case in test_list:
                stdout_buffer = StringIO()
                stderr_buffer = StringIO()
                
                try:
                    # Create input feeder for this test
                    input_feeder = InputFeeder(test_case['input'])
                    
                    # Set up the execution environment
                    local_namespace = {
                        '__builtins__': __builtins__,
                        'input': input_feeder,
                        'raw_input': input_feeder
                    }
                    
                    # Execute the code with redirected I/O
                    with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
                        exec(code_str, local_namespace)
                        
                    # Capture output
                    actual_output = stdout_buffer.getvalue()
                    
                    # Compare with expected output
                    if actual_output.strip() == test_case['output'].strip():
                        result['passed_tests'].append({
                            'input': test_case['input'],
                            'expected_output': test_case['output'],
                            'actual_output': actual_output
                        })
                    else:
                        result['failed_tests'].append({
                            'input': test_case['input'],
                            'expected_output': test_case['output'],
                            'actual_output': actual_output,
                            'error': 'Output mismatch'
                        })
                        
                except Exception as e:
                    result['failed_tests'].append({
                        'input': test_case['input'],
                        'expected_output': test_case['output'],
                        'error': str(e)
                    })
                finally:
                    stdout_buffer.close()
                    stderr_buffer.close()
        else:
            # Run all tests in one execution by concatenating inputs
            all_inputs = '\n'.join(t['input'] for t in test_list)
            stdout_buffer = StringIO()
            stderr_buffer = StringIO()
            
            try:
                # Create input feeder for all tests
                input_feeder = InputFeeder(all_inputs)
                
                # Set up the execution environment
                local_namespace = {
                    '__builtins__': __builtins__,
                    'input': input_feeder,
                    'raw_input': input_feeder
                }
                
                # Execute the code with redirected I/O
                with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer):
                    exec(code_str, local_namespace)
                    
                # Capture output and split by test case
                actual_output = stdout_buffer.getvalue()
                actual_outputs = actual_output.strip().split('\n')
                
                # Compare each test case
                for i, test_case in enumerate(test_list):
                    expected_lines = test_case['output'].strip().split('\n')
                    actual_lines = actual_outputs[i:i+len(expected_lines)]
                    actual_test_output = '\n'.join(actual_lines)
                    
                    if actual_test_output == test_case['output'].strip():
                        result['passed_tests'].append({
                            'input': test_case['input'],
                            'expected_output': test_case['output'],
                            'actual_output': actual_test_output
                        })
                    else:
                        result['failed_tests'].append({
                            'input': test_case['input'],
                            'expected_output': test_case['output'],
                            'actual_output': actual_test_output,
                            'error': 'Output mismatch'
                        })
                        
            except Exception as e:
                result['errors'].append({
                    'type': 'execution_error',
                    'message': str(e),
                    'traceback': traceback.format_exc()
                })
                # Mark all tests as failed
                for test_case in test_list:
                    result['failed_tests'].append({
                        'input': test_case['input'],
                        'expected_output': test_case['output'],
                        'error': str(e)
                    })
            finally:
                stdout_buffer.close()
                stderr_buffer.close()
                
    except Exception as e:
        result['errors'].append({
            'type': 'execution_error',
            'message': str(e),
            'traceback': traceback.format_exc()
        })
    
    finally:
        end_time = time.time()
        result['execution_status']['execution_time'] = end_time - start_time
        
        # Add summary statistics
        total_tests = len(test_list)
        passed_tests = len(result['passed_tests'])
        failed_tests = len(result['failed_tests'])
        
        result['summary'] = {
            'total_tests': total_tests,
            'passed_tests': passed_tests,
            'failed_tests': failed_tests,
            'success_rate': passed_tests/total_tests if total_tests > 0 else 0.0,
            'execution_time': result['execution_status']['execution_time'],
            'timed_out': result['execution_status']['timed_out'],
            'timeout_during': result['execution_status']['timeout_during']
        }
    
    shared_dict.update(result)

def execute_code_with_io_safely(code_str, test_list, timeout=5, run_tests_separately=True):
    """
    Execute Python code with input/output in a controlled environment and return the results.
    Uses a separate process with timeout.

    Args:
        code_str (str): Python code to execute
        test_list (list): List of dictionaries containing input/output pairs
                         Each dict should have 'input' and 'output' keys
        timeout (int, optional): Maximum execution time in seconds. Defaults to 5.
        run_tests_separately (bool, optional): Whether to run each test in isolation. Defaults to True.

    Returns:
        dict: Results containing passed/failed tests, errors, and execution status
    """
    default_result = {
        'passed_tests': [],
        'failed_tests': [],
        'errors': [],
        'execution_status': {
            'timed_out': False,
            'timeout_during': 'none',
            'execution_time': 0
        },
        'summary': {
            'total_tests': len(test_list),
            'passed_tests': 0,
            'failed_tests': len(test_list),
            'success_rate': 0.0,
            'execution_time': 0,
            'timed_out': False,
            'timeout_during': 'none'
        }
    }
    
    with Manager() as manager:
        shared_dict = manager.dict()
        p = Process(target=run_code_with_io_in_process, 
                   args=(code_str, test_list, shared_dict, timeout, run_tests_separately))
        p.start()
        p.join(timeout)
        
        if p.is_alive():
            p.terminate()
            p.join()
            
            result = dict(shared_dict) if shared_dict else default_result.copy()
            
            result['errors'].append({
                'type': 'timeout_error',
                'message': f'Code execution timed out after {timeout} seconds'
            })
            
            result['execution_status'].update({
                'timed_out': True,
                'timeout_during': 'execution',
                'execution_time': timeout
            })
            
            # Update summary for timeout case
            result['summary'].update({
                'execution_time': timeout,
                'timed_out': True,
                'timeout_during': 'execution'
            })
            
            # Mark all remaining tests as failed
            for test_case in test_list:
                if test_case not in [t.get('input') for t in result.get('passed_tests', [])]:
                    result['failed_tests'].append({
                        'input': test_case['input'],
                        'expected_output': test_case['output'],
                        'error': 'Timeout'
                    })
            
            return result
        
        result = dict(shared_dict) if shared_dict else default_result.copy()
        return result

def execute_tests_safely(code_str, test_list, timeout=5, run_tests_separately=True):
    """
    Execute Python code with tests in a controlled environment and return the results.
    Uses a separate process with timeout.
    """
    # Define default result structure
    default_result = {
        'passed_tests': [],
        'failed_tests': [],
        'errors': [],
        'execution_status': {
            'timed_out': False,
            'timeout_during': 'none',
            'execution_time': 0
        },
        'summary': {
            'total_tests': 0,
            'passed_tests': 0,
            'failed_tests': 0,
            'success_rate': 0.0,
            'execution_time': 0,
            'timed_out': False,
            'timeout_during': 'docker_execution'
        }
    }

    with Manager() as manager:
        shared_dict = manager.dict()
        p = Process(target=run_code_in_process, 
                   args=(code_str, test_list, shared_dict, timeout, run_tests_separately))
        p.start()
        p.join(timeout)
        
        if p.is_alive():
            p.terminate()
            p.join()
            
            # Get any results that were completed before timeout, or use default if empty
            result = dict(shared_dict) if shared_dict else default_result.copy()
            
            # Add timeout error
            result['errors'].append({
                'type': 'timeout_error',
                'message': f'Code execution timed out after {timeout} seconds'
            })
            
            # Update execution status
            result['execution_status'].update({
                'timed_out': True,
                'timeout_during': 'execution',
                'execution_time': timeout
            })
            
            # Update summary with partial results
            total_tests = len(test_list)
            passed_tests = len(result.get('passed_tests', []))
            failed_tests = len(result.get('failed_tests', []))
            remaining_tests = total_tests - (passed_tests + failed_tests)
            
            result['summary'] = {
                'total_tests': total_tests,
                'passed_tests': passed_tests,
                'failed_tests': failed_tests + remaining_tests,  # Count untested as failed
                'success_rate': passed_tests/total_tests if total_tests > 0 else 0.0,
                'execution_time': timeout,
                'timed_out': True,
                'timeout_during': 'execution'
            }
            
            return result
        
        # Handle empty shared_dict case
        result = dict(shared_dict) if shared_dict else default_result.copy()
        
        # Ensure summary exists
        if 'summary' not in result:
            total_tests = len(test_list)
            passed_tests = len(result.get('passed_tests', []))
            failed_tests = len(result.get('failed_tests', []))
            remaining_tests = total_tests - (passed_tests + failed_tests)
            
            result['summary'] = {
                'total_tests': total_tests,
                'passed_tests': passed_tests,
                'failed_tests': failed_tests + remaining_tests,
                'success_rate': passed_tests/total_tests if total_tests > 0 else 0.0,
                'execution_time': result['execution_status']['execution_time'],
                'timed_out': result['execution_status']['timed_out'],
                'timeout_during': result['execution_status']['timeout_during']
            }
        
        return result

app = Flask(__name__)

@app.route('/execute', methods=['POST'])
def execute_code():
    if not request.is_json:
        return jsonify({'error': 'Content-Type must be application/json'}), 400
    
    data = request.json
    code = data.get('code')
    test_list = data.get('test_list', [])
    run_tests_separately = data.get('run_tests_separately', True)
    timeout = data.get('timeout', 5)

    if not code:
        return jsonify({'error': 'No code provided'}), 400
    
    if not test_list:
        return jsonify({'error': 'No tests provided'}), 400

    result = execute_tests_safely(code, test_list, timeout=timeout, run_tests_separately=run_tests_separately)
    return jsonify(result)

@app.route('/ioexecute', methods=['POST'])
def execute_code_with_io():
    # Log request details
    logger.info(f"Received request from {request.remote_addr}")
    
    if not request.is_json:
        logger.error(f"Invalid Content-Type: {request.content_type}")
        return jsonify({'error': 'Content-Type must be application/json'}), 400
    
    try:
        data = request.json
        logger.info(f"Request payload size: {len(str(data))} bytes")
    except json.JSONDecodeError as e:
        logger.error(f"JSON decode error: {str(e)}")
        return jsonify({'error': 'Invalid JSON format'}), 400
    
    code = data.get('code')
    test_list = data.get('test_list', [])
    run_tests_separately = data.get('run_tests_separately', True)
    timeout = data.get('timeout', 5)

    # Log validation steps
    if not code:
        logger.error("No code provided in request")
        return jsonify({'error': 'No code provided'}), 400
    
    if not test_list:
        logger.error("No test cases provided in request")
        return jsonify({'error': 'No test cases provided'}), 400
    
    # Validate test list format
    for i, test in enumerate(test_list):
        if not isinstance(test, dict):
            logger.error(f"Test case {i} is not a dictionary: {type(test)}")
            return jsonify({'error': f'Test case {i} must be a dictionary'}), 400
        if 'input' not in test or 'output' not in test:
            logger.error(f"Test case {i} missing required keys: {test.keys()}")
            return jsonify({'error': f'Test case {i} must have both "input" and "output" keys'}), 400
        
    logger.info(f"Validation passed. Processing {len(test_list)} test cases with timeout={timeout}s")

    result = execute_code_with_io_safely(code, test_list, timeout=timeout, run_tests_separately=run_tests_separately)
    
    # Log execution results
    if result.get('errors'):
        logger.error(f"Execution errors: {result['errors']}")
    
    summary = result.get('summary', {})
    logger.info(f"Execution completed: {summary.get('passed_tests', 0)}/{summary.get('total_tests', 0)} tests passed, " +
                f"time={summary.get('execution_time', 0):.2f}s, " +
                f"timed_out={summary.get('timed_out', False)}")
    
    return jsonify(result)

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=1337, debug=False)
