import json
import subprocess
import pytest
from pathlib import Path

# Define the paths relative to the test script's location
TEST_DIR = Path(__file__).parent
SCRIPT_PATH = TEST_DIR / "transform_log.py"
TEST_CASES_DIR = TEST_DIR / "test_cases"
EXPECTED_OUTPUTS_DIR = TEST_DIR / "expected_outputs"

# Find all .txt files in the test_cases directory
test_files = list(TEST_CASES_DIR.glob("*.txt"))
ids = [file.name for file in test_files]

@pytest.mark.parametrize("log_file_path", test_files, ids=ids)
def test_log_transformation(log_file_path):
    """
    This test function runs for every .txt file in the test_cases directory.
    It transforms the log file and compares the output to the corresponding
    "golden file" in the expected_outputs directory.
    """
    # 1. Prepare paths
    log_filename = log_file_path.name
    expected_json_path = EXPECTED_OUTPUTS_DIR / log_filename.replace(".txt", ".json")

    assert SCRIPT_PATH.is_file(), "The transform_log.py script was not found."
    assert expected_json_path.is_file(), f"Golden file not found for {log_filename}"

    # 2. Run the transformation script
    result = subprocess.run(
        ["python3", str(SCRIPT_PATH), str(log_file_path)],
        capture_output=True,
        text=True,
        check=True
    )

    # 3. Load the actual and expected JSON data
    try:
        actual_json = json.loads(result.stdout)
    except json.JSONDecodeError:
        pytest.fail(f"The script did not produce valid JSON for {log_filename}.\nOutput:\n{result.stdout}")

    expected_json = json.loads(expected_json_path.read_text(encoding='utf-8'))

    # 4. Compare the results
    # We compare key by key to provide better error messages if they fail
    assert actual_json.get("log_filename") == expected_json.get("log_filename")
    assert actual_json.get("session_id") == expected_json.get("session_id")
    
    # Compare epilogues if they exist
    assert actual_json.get("epilogue") == expected_json.get("epilogue")

    # Compare conversations turn by turn
    assert len(actual_json.get("conversation", [])) == len(expected_json.get("conversation", []))
    for i, (actual_turn, expected_turn) in enumerate(zip(actual_json["conversation"], expected_json["conversation"])):
        assert actual_turn == expected_turn, f"Mismatch in turn {i+1} for {log_filename}"

    # Final full comparison as a safeguard
    assert actual_json == expected_json
