import threading
import queue
import time
import traceback # For detailed error logging

class CodeTester:
    def compile_code(self, code_str, main_function_name=None):
        """
        Compiles a Python code string and extracts a specified function or the first callable.

        Args:
            code_str (str): The Python code as a string.
            main_function_name (str, optional): The specific name of the function to extract.
                                                If None, finds the first callable. Defaults to None.

        Returns:
            tuple: (callable_function, error_message_or_None)
        """
        try:
            local_vars = {}
            # Execute the code string in a dictionary to capture defined objects
            exec(code_str, {}, local_vars)

            if main_function_name is not None:
                # Try to get the function by the specified name
                func = local_vars.get(main_function_name)
                if callable(func):
                    return func, None
                else:
                    return None, f"Main function '{main_function_name}' not found or not callable in the provided code."
            else:
                # If no name specified, find the first callable object
                for name, obj in local_vars.items():
                    if callable(obj):
                        return obj, None
                return None, "No callable function found in the provided code."
        except Exception as e:
            # Capture compilation/syntax errors
            return None, f"Compilation Error: {str(e)}\n{traceback.format_exc()}"

    def _worker_process(self, func_code, main_func_name, test_code, timeout, result_queue):
        """
        Worker process (using a thread here) to compile and test the function,
        capturing the output.

        Args:
            func_code (str): Code string for the main function and helpers.
            main_func_name (str): Name of the main function in func_code.
            test_code (str): Code string for the test function.
                             The test function MUST return a dict: {'passed': bool, 'output': Any}
            timeout (float): Maximum execution time in seconds.
            result_queue (queue.Queue): Queue to put the results dictionary into.
        """
        func = None
        test_func = None
        main_output = None # Variable to store the output from the main function

        try:
            # Compile the main function code
            func, func_error = self.compile_code(func_code, main_func_name)
            if func is None:
                result_queue.put({
                    'success': False,
                    'reason': 'main_func_compile_error',
                    'message': func_error,
                    'output': None # No output if compilation failed
                })
                return

            # Compile the test function code (expecting the first callable)
            # Ensure test_code includes necessary imports if not present in func_code
            test_func, test_error = self.compile_code(test_code)
            if test_func is None:
                result_queue.put({
                    'success': False,
                    'reason': 'test_func_compile_error',
                    'message': test_error,
                    'output': None # No output if test compilation failed
                })
                return

            # --- Threading setup to run the test with a timeout ---
            result_container = [] # To get results back from the thread
            event = threading.Event() # To signal completion or timeout

            def worker():
                nonlocal main_output # Allow modification of outer scope variable
                try:
                    # --- Execute the test function ---
                    # It should return {'passed': bool, 'output': Any}
                    test_result_data = test_func(func)

                    # --- Validate the structure returned by test_func ---
                    if isinstance(test_result_data, dict) and \
                       'passed' in test_result_data and \
                       'output' in test_result_data and \
                       isinstance(test_result_data['passed'], bool):

                        test_passed = test_result_data['passed']
                        main_output = test_result_data['output'] # Capture the output

                        if test_passed:
                            result_container.append({
                                'success': True,
                                'reason': None,
                                'message': None,
                                'output': main_output # Include output on success
                            })
                        else:
                            result_container.append({
                                'success': False,
                                'reason': 'test_failed',
                                'message': 'Test function returned False',
                                'output': main_output # Include output on failure
                            })
                    else:
                         # Handle case where test_func returned unexpected format
                         result_container.append({
                             'success': False,
                             'reason': 'test_func_invalid_return',
                             'message': f'Test function did not return the expected dictionary format. Got: {type(test_result_data)}',
                             'output': None
                         })

                except Exception as e:
                    # Capture exceptions during test execution
                    result_container.append({
                        'success': False,
                        'reason': 'test_execution_exception',
                        'message': f"Exception during test execution: {str(e)}\n{traceback.format_exc()}",
                        'output': main_output # Include output if available before exception
                    })
                finally:
                    # Signal that the worker has finished or an error occurred
                    event.set()

            # Start the worker thread
            thread = threading.Thread(target=worker)
            thread.daemon = True # Allow program to exit even if thread is running
            thread.start()

            # Wait for the thread to finish or timeout
            event_occurred = event.wait(timeout)

            # --- Process the results ---
            if not event_occurred:
                # Timeout occurred
                result_queue.put({
                    'success': False,
                    'reason': 'timeout',
                    'message': f'Test execution exceeded {timeout} seconds',
                    'output': None # No reliable output on timeout
                })
            else:
                # Thread finished within timeout
                if result_container:
                    result_queue.put(result_container[0]) # Put the captured result in the queue
                else:
                    # Should not happen if event was set, but handle defensively
                    result_queue.put({
                        'success': False,
                        'reason': 'unknown_worker_error',
                        'message': 'Worker finished but no result was captured.',
                        'output': None
                    })

        except Exception as e:
            # Catch errors in the _worker_process setup itself
            result_queue.put({
                'success': False,
                'reason': 'worker_process_setup_error',
                'message': f"Error setting up worker process: {str(e)}\n{traceback.format_exc()}",
                'output': None
            })

    def run_test(self, func_code, main_func_name, test_code, timeout=5.0):
        """
        Public method to run the test and get the result.

        Args:
            func_code (str): Code string for the main function.
            main_func_name (str): Name of the main function.
            test_code (str): Code string for the test function (must return dict).
            timeout (float, optional): Timeout in seconds. Defaults to 5.0.

        Returns:
            dict: The result dictionary from the worker process.
        """
        q = queue.Queue()
        # Using threading directly here as per original structure,
        # but for true isolation, multiprocessing would be better.
        # If using multiprocessing, ensure queue and results are pickleable.
        process_thread = threading.Thread(target=self._worker_process, args=(
            func_code, main_func_name, test_code, timeout, q
        ))
        process_thread.start()
        process_thread.join() # Wait for the worker setup/thread management to finish

        try:
            # Get the result from the queue
            result = q.get_nowait() # Should have a result by now
        except queue.Empty:
            result = {
                'success': False,
                'reason': 'queue_error',
                'message': 'Result queue was empty after worker finished.',
                'output': None
            }
        return result

# --- Example Usage ---
if __name__ == "__main__":
    # Main function code
    func_code_example = """
def tuple_intersection(list1, list2):
    # Find the intersection of elements in two tuple lists.
    def find_intersection(lst1, lst2):
        # Convert lists to sets for efficient intersection
        # Note: Tuples must be hashable.
        try:
            set1 = set(lst1)
            set2 = set(lst2)
            intersection = set1.intersection(set2)
            return intersection
        except TypeError as e:
            # Handle cases where items in lists are not hashable (e.g., lists within lists)
            print(f"Error: Items in lists must be hashable (like tuples). {e}")
            # Return an empty set or raise an error, depending on desired behavior
            return set()

    return find_intersection(list1, list2)
"""

    # Test function code (MODIFIED to return the required dictionary)
    test_code_example = """
import collections # Example: Import if needed by the test logic itself

def test_case(func_to_test):
    # Define test inputs
    input1 = [(3, 4), (5, 6), (9, 10), (4, 5)]
    input2 = [(5, 4), (3, 4), (6, 5), (9, 11)]
    # Define the expected output
    expected_output = {(3, 4), (5, 6), (4, 5)} # Corrected expected output

    # --- Call the function under test and capture its output ---
    actual_output = func_to_test(input1, input2)

    # --- Perform the comparison ---
    # Comparing sets ignores order inherently
    test_passed = (actual_output == expected_output)

    # --- Prepare the output for return ---
    # Sets are not directly JSON serializable if needed later. Convert to list.
    # Sorting ensures consistent order if needed.
    serializable_output = sorted(list(actual_output))

    # --- Return the result dictionary ---
    return {'passed': test_passed, 'output': serializable_output}
"""

    main_func_name_example = "tuple_intersection"

    # Instantiate the tester
    tester = CodeTester()

    # Run the test
    result = tester.run_test(func_code_example, main_func_name_example, test_code_example, timeout=2.0)

    # Print the full result dictionary
    import json
    print("Test Result:")
    # Use json dumps for pretty printing the dictionary
    print(json.dumps(result, indent=4))

    # Example of accessing the captured output
    if result.get('success'):
        print(f"\nTest Passed! Output was: {result.get('output')}")
    elif 'output' in result: # Even if failed, output might be available
        print(f"\nTest Failed. Output captured: {result.get('output')}")
        print(f"Reason: {result.get('reason')} - {result.get('message')}")
    else:
         print(f"\nTest Failed or Errored. No output captured.")
         print(f"Reason: {result.get('reason')} - {result.get('message')}")

    # --- Example: Test with a function that fails the test ---
    test_code_fail = """
def test_case(func_to_test):
    input1 = [(1, 1)]
    input2 = [(2, 2)]
    expected_output = {(1, 1)} # This will not match the actual output {}
    actual_output = func_to_test(input1, input2)
    test_passed = actual_output == expected_output
    serializable_output = sorted(list(actual_output))
    return {'passed': test_passed, 'output': serializable_output}
"""
    print("\n--- Running Failing Test ---")
    result_fail = tester.run_test(func_code_example, main_func_name_example, test_code_fail, timeout=2.0)
    print("Test Result:")
    print(json.dumps(result_fail, indent=4))
    if 'output' in result_fail:
        print(f"\nTest Failed. Output captured: {result_fail.get('output')}")


    # --- Example: Test with code that causes an exception during test ---
    test_code_exception = """
def test_case(func_to_test):
    # This will cause a TypeError inside tuple_intersection because lists are not hashable
    input1 = [[3, 4]]
    input2 = [[5, 4]]
    actual_output = func_to_test(input1, input2) # Exception happens here
    # The rest won't execute
    return {'passed': False, 'output': actual_output}
"""
    print("\n--- Running Test Causing Exception ---")
    result_exception = tester.run_test(func_code_example, main_func_name_example, test_code_exception, timeout=2.0)
    print("Test Result:")
    print(json.dumps(result_exception, indent=4))
