import json
import os
import networkx as nx
import time
import fcntl
from datetime import datetime
from typing import Dict, Any, List, Optional

from scripts.utils.decode_graph import decode_graph_from_text
from scripts.generate_prompts import SIZE_PATTERNS


def generate_graph_metadata(graph: nx.Graph) -> Dict[str, Any]:
    """
    Generate comprehensive metadata for a graph, including details about colored nodes.
    Uses compact string representation for node lists.

    Parameters:
    - graph: NetworkX graph

    Returns:
    - Dictionary of metadata
    """
    # Get basic counts
    node_count = len(graph.nodes)
    edge_count = len(graph.edges)

    # Analyze colors
    color_distribution = {}
    colored_nodes_by_color = {}

    for node, data in graph.nodes(data=True):
        color = data.get("color", "grey")

        # Count colors
        color_distribution[color] = color_distribution.get(color, 0) + 1

        # Track which nodes have each color
        if color not in colored_nodes_by_color:
            colored_nodes_by_color[color] = []
        colored_nodes_by_color[color].append(node)

    # Convert node lists to comma-separated strings - FIXED: use .items()
    for color, nodes in colored_nodes_by_color.items():
        if nodes:  # Check if list is not empty
            colored_nodes_by_color[color] = ",".join(map(str, sorted(nodes)))
        else:
            colored_nodes_by_color[color] = ""

    # Count non-grey nodes
    colored_nodes = node_count - color_distribution.get("grey", 0)

    # Check properties
    properties = {
        "connected": nx.is_connected(graph),
        "acyclic": nx.is_forest(graph),
        "tree": nx.is_tree(graph),
        "has_leaves": any(d == 1 for _, d in graph.degree()),
        "has_colored_leaves": any(
            graph.degree[n] == 1 and graph.nodes[n].get("color", "grey") != "grey"
            for n in graph.nodes
        ),
    }

    # Analyze degree distribution
    degree_distribution = {}
    nodes_by_degree = {}

    for node, degree in graph.degree():
        # Update degree count
        degree_str = str(degree)
        degree_distribution[degree_str] = degree_distribution.get(degree_str, 0) + 1

        # Track nodes for each degree
        if degree_str not in nodes_by_degree:
            nodes_by_degree[degree_str] = []
        nodes_by_degree[degree_str].append(node)

    # Convert degree node lists to comma-separated strings - FIXED: use .items()
    for degree, nodes in nodes_by_degree.items():
        if nodes:  # Check if list is not empty
            nodes_by_degree[degree] = ",".join(map(str, sorted(nodes)))
        else:
            nodes_by_degree[degree] = ""

    # Count connected components
    connected_components = nx.number_connected_components(graph)

    # Track colored leaves specifically
    colored_leaves = [
        node
        for node in graph.nodes
        if graph.degree[node] == 1 and graph.nodes[node].get("color", "grey") != "grey"
    ]
    colored_leaves_str = (
        ",".join(map(str, sorted(colored_leaves))) if colored_leaves else ""
    )

    # Find the maximum degree node(s)
    degrees = dict(graph.degree())
    max_degree = max(degrees.values()) if degrees else 0
    max_degree_nodes = [node for node, deg in degrees.items() if deg == max_degree]
    max_degree_nodes_str = (
        ",".join(map(str, sorted(max_degree_nodes))) if max_degree_nodes else ""
    )

    # Find the minimum degree node(s)
    min_degree = min(degrees.values()) if degrees else 0
    min_degree_nodes = [node for node, deg in degrees.items() if deg == min_degree]
    min_degree_nodes_str = (
        ",".join(map(str, sorted(min_degree_nodes))) if min_degree_nodes else ""
    )

    return {
        "node_count": node_count,
        "edge_count": edge_count,
        "colored_nodes_count": colored_nodes,
        "color_distribution": color_distribution,
        "colored_nodes_by_color": colored_nodes_by_color,  # String representation
        "properties": properties,
        "connected_components": connected_components,
        "degree_distribution": degree_distribution,
        "nodes_by_degree": nodes_by_degree,  # String representation
        "colored_leaves": colored_leaves_str,  # String representation
        "max_degree": max_degree,
        "max_degree_nodes": max_degree_nodes_str,  # String representation
        "min_degree": min_degree,
        "min_degree_nodes": min_degree_nodes_str,  # String representation
    }


def initialize_specs_file(
    task_name: str,
    graph_type: str,
    base_seed: int,
    generator_function: str,
    generator_parameters: Dict[str, Any] = None,
    property_requirements: List[str] = None,
    transformation_parameters: Dict[str, Any] = None,
    description: str = None,
) -> Dict[str, Any]:
    """
    Initialize a new specs.json structure for a benchmark.

    Parameters:
    - task_name: Name of the benchmark task
    - graph_type: Type of graph used
    - base_seed: Seed used for generation
    - generator_function: Name of generator function
    - generator_parameters: Parameters passed to generator
    - property_requirements: Required properties for the task
    - transformation_parameters: Parameters for the transformation
    - description: Human-readable description

    Returns:
    - Initial specs dictionary
    """
    generator_parameters = generator_parameters or {}
    property_requirements = property_requirements or []
    transformation_parameters = transformation_parameters or {}

    return {
        "task_info": {
            "task_name": task_name,
            "graph_type": graph_type,
            "description": description or f"{task_name} task using {graph_type} graphs",
            "transformation_parameters": transformation_parameters,
            "property_requirements": property_requirements,
            "generation_timestamp": datetime.now().isoformat(),
        },
        "generation_settings": {
            "base_seed": base_seed,
            "generator_function": generator_function,
            "generator_parameters": generator_parameters,
        },
        "graphs_by_size": {},
    }


def load_specs_with_lock(specs_path: str) -> Dict[str, Any]:
    """
    Load the specs file with file locking to prevent race conditions.

    Parameters:
    - specs_path: Path to the specs file

    Returns:
    - Dict: Loaded specs, or empty dict if file doesn't exist
    """
    if not os.path.exists(specs_path):
        return {}

    max_retries = 5
    retry_delay = 0.5  # seconds

    for attempt in range(max_retries):
        try:
            with open(specs_path, "r+", encoding="utf-8") as f:
                # Acquire an exclusive lock
                fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)

                try:
                    # Read the file content
                    content = f.read()
                    if not content.strip():
                        return {}
                    return json.loads(content)
                finally:
                    # Release the lock
                    fcntl.flock(f, fcntl.LOCK_UN)
        except (json.JSONDecodeError, IOError) as e:
            if attempt < max_retries - 1:
                print(
                    f"Error loading specs file (attempt {attempt+1}): {e}. Retrying..."
                )
                time.sleep(retry_delay)
            else:
                print(f"Failed to load specs file after {max_retries} attempts: {e}")
                return {}

    return {}


def save_specs_with_lock(specs_path: str, specs: Dict[str, Any]) -> bool:
    """
    Save the specs file with file locking to prevent race conditions.

    Parameters:
    - specs_path: Path to the specs file
    - specs: Dictionary to save

    Returns:
    - Bool: True if saved successfully, False otherwise
    """
    os.makedirs(os.path.dirname(specs_path), exist_ok=True)

    max_retries = 5
    retry_delay = 0.5  # seconds

    for attempt in range(max_retries):
        try:
            with open(specs_path, "w", encoding="utf-8") as f:
                # Acquire an exclusive lock
                fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)

                try:
                    # Write the JSON data
                    json.dump(specs, f, indent=2)
                    return True
                finally:
                    # Release the lock
                    fcntl.flock(f, fcntl.LOCK_UN)
        except IOError as e:
            if attempt < max_retries - 1:
                print(
                    f"Error saving specs file (attempt {attempt+1}): {e}. Retrying..."
                )
                time.sleep(retry_delay)
            else:
                print(f"Failed to save specs file after {max_retries} attempts: {e}")
                return False

    return False


def get_or_create_specs(
    benchmark_path: str,
    task_name: str,
    graph_type: str,
    base_seed: int,
    generator_function: str,
    generator_parameters: Dict[str, Any] = None,
    property_requirements: List[str] = None,
    transformation_parameters: Dict[str, Any] = None,
    description: str = None,
) -> Dict[str, Any]:
    """
    Get existing specs file or create a new one if it doesn't exist.

    Parameters:
    - benchmark_path: Path to the benchmark directory
    - Additional parameters for initializing the specs

    Returns:
    - Specs dictionary (either loaded or newly created)
    """
    os.makedirs(benchmark_path, exist_ok=True)

    specs_path = os.path.join(benchmark_path, "specs.json")

    # Try to load existing specs
    specs = load_specs_with_lock(specs_path)

    # If no specs exist, initialize new ones
    if not specs:
        specs = initialize_specs_file(
            task_name=task_name,
            graph_type=graph_type,
            base_seed=base_seed,
            generator_function=generator_function,
            generator_parameters=generator_parameters,
            property_requirements=property_requirements,
            transformation_parameters=transformation_parameters,
            description=description,
        )

        # Save the new specs
        save_specs_with_lock(specs_path, specs)

    return specs


def update_specs_with_pair(
    benchmark_path: str,
    specs: Dict[str, Any],
    num_nodes: int,
    pair_index: int,
    seed: int,
    input_graph: nx.Graph,
    output_graph: nx.Graph,
    file_paths: Dict[str, Dict[str, Dict[str, str]]],
) -> Dict[str, Any]:
    """
    Update specs dictionary with metadata for a new input-output pair,
    and save it with proper locking.

    Parameters:
    - benchmark_path: Path to the benchmark directory
    - specs: Current specs dictionary
    - num_nodes: Number of nodes in the graphs
    - pair_index: Index of this input-output pair
    - seed: Seed used for this pair
    - input_graph: Input graph
    - output_graph: Output graph
    - file_paths: Dictionary with paths to encodings

    Returns:
    - Updated specs dictionary
    """
    specs_path = os.path.join(benchmark_path, "specs.json")

    # Atomically read-modify-write the specs file under an exclusive lock
    os.makedirs(os.path.dirname(specs_path), exist_ok=True)
    try:
        f = open(specs_path, "r+", encoding="utf-8")
    except FileNotFoundError:
        f = open(specs_path, "w+", encoding="utf-8")
    try:
        fcntl.flock(f, fcntl.LOCK_EX)
        try:
            content = f.read().strip()
            if content:
                file_specs = json.loads(content)
            else:
                file_specs = {}
        except json.JSONDecodeError:
            file_specs = {}

        # Use existing specs as base if file was empty
        specs_dict = file_specs or specs

        size_key = str(num_nodes)
        specs_dict.setdefault("graphs_by_size", {}).setdefault(size_key, {"pairs": []})

        # Build or update this pair's metadata
        pair_data = {
            "index": pair_index,
            "seed": seed,
            "input_graph": generate_graph_metadata(input_graph),
            "output_graph": generate_graph_metadata(output_graph),
            "files": file_paths,
        }
        pairs = specs_dict["graphs_by_size"][size_key]["pairs"]
        for i, existing in enumerate(pairs):
            if existing.get("index") == pair_index:
                pairs[i] = pair_data
                break
        else:
            pairs.append(pair_data)

        # Keep sorted for readability
        pairs.sort(key=lambda x: x.get("index", 0))

        # Write back
        f.seek(0)
        f.truncate()
        json.dump(specs_dict, f, indent=2)
        f.flush()
    finally:
        fcntl.flock(f, fcntl.LOCK_UN)
        f.close()

    return specs_dict


def save_specs(benchmark_path: str, specs: Dict[str, Any]) -> None:
    """
    Save the specs dictionary to the specs.json file.

    Parameters:
    - benchmark_path: Path to the benchmark directory
    - specs: Specs dictionary to save
    """
    specs_path = os.path.join(benchmark_path, "specs.json")
    save_specs_with_lock(specs_path, specs)


def collect_response_metadata(
    benchmark_path: str,
    response_path: str,
    encoding: str,
    ground_truth_path: str,
    is_correct: bool,
    prompt_metadata: Dict[str, Any],
    comparison_mode: str,
    parse_error: Optional[str] = None,
    token_usage: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
    """
    Collect and save metadata about a model's response for later analysis.

    Parameters:
    - benchmark_path: Path to the benchmark directory
    - response_path: Path to the response file
    - encoding: Encoding type used (adjacency, incident)
    - ground_truth_path: Path to the ground truth file
    - is_correct: Whether the response correctly solved the task
    - prompt_metadata: Metadata about the prompt (system_prompt, pattern, n_pairs)
    - comparison_mode: Mode used for comparison (isomorphic or label_consistent)
    - parse_error: Error message if parsing failed, None otherwise
    - token_usage: Optional dictionary with token usage information

    Returns:
    - Dictionary with collected metadata
    """
    response_specs_path = os.path.join(benchmark_path, "response_specs.json")

    # Try to load existing response specs
    response_specs = load_specs_with_lock(response_specs_path)
    if not response_specs:
        response_specs = {
            "responses": [],
            "last_updated": datetime.now().isoformat(),
        }

    # Parse response_path to extract model name
    filename = os.path.basename(response_path)
    model_name = filename.split("_")[-1].replace(".txt", "")

    # Parse the response and extract graph metadata if possible
    response_graph_metadata = {}
    ground_truth_graph_metadata = {}

    if not parse_error:
        try:
            # Parse response graph
            response_graph = decode_graph_from_text(
                response_path, encoding_type=encoding, response=True
            )
            response_graph_metadata = generate_graph_metadata(response_graph)

            # Parse ground truth graph
            ground_truth_graph = decode_graph_from_text(
                ground_truth_path, encoding_type=encoding, response=False
            )
            ground_truth_graph_metadata = generate_graph_metadata(ground_truth_graph)

        except (ValueError, FileNotFoundError, TypeError, nx.NetworkXException) as e:
            # Catch networkx errors when parsing graphs (e.g., null graph connectivity)
            parse_error = str(e)

    # Build response metadata
    response_metadata = {
        "model": model_name,
        "file_path": os.path.relpath(response_path, start=benchmark_path),
        "ground_truth_path": os.path.relpath(ground_truth_path, start=benchmark_path),
        "encoding": encoding,
        "size_pattern": prompt_metadata.get("size_pattern"),
        "system_prompt": prompt_metadata.get("system_prompt"),
        "n_pairs": prompt_metadata.get("n_pairs"),
        "question_type": prompt_metadata.get("question_type"),
        "target": prompt_metadata.get("target"),
        "comparison_mode": comparison_mode,
        "is_correct": is_correct,
        "timestamp": datetime.now().isoformat(),
        "parse_error": parse_error,
        "response_graph": response_graph_metadata,
        "ground_truth_graph": ground_truth_graph_metadata,
        "token_usage": token_usage,  # Add token usage data
    }

    # Check if this response already exists (using file_path as identifier)
    existing_index = None
    for i, resp in enumerate(response_specs["responses"]):
        if resp.get("file_path") == response_metadata["file_path"]:
            existing_index = i
            break

    # Update or add the response metadata
    if existing_index is not None:
        response_specs["responses"][existing_index] = response_metadata
    else:
        response_specs["responses"].append(response_metadata)

    # Update timestamp
    response_specs["last_updated"] = datetime.now().isoformat()

    # Save the updated specs
    save_specs_with_lock(response_specs_path, response_specs)

    return response_metadata


def extract_prompt_metadata(prompt_path):
    """Extract metadata from a prompt filename with the new hyphen-based format.
    Updated to handle the same issues with model names containing hyphens.

    Expected formats:
    - New: {encoding}-{size_pattern}-{system_prompt}-{num_pairs}-{question_type}-{target}.txt
    - Old/Default: {encoding}-{size_pattern}-{system_prompt}-{num_pairs}.txt (defaults to full_output/output)

    Size patterns can be:
    - Named: "scale_up_3", "mixed_3", etc. (underscores preserved within pattern names)
    - Explicit: "5-10-15", "10-10-10", etc.

    Returns:
    - Dictionary with encoding, size_pattern, system_prompt, num_pairs, question_type, target, and sizes
    - None if filename cannot be parsed or is invalid
    """
    try:
        basename = os.path.basename(prompt_path)
        if not basename.endswith(".txt"):
            print(f"⚠️ Skipping non-txt file: {basename}")
            return None

        # Remove .txt extension
        name_without_ext = basename[:-4]
        parts = name_without_ext.split("-")

        if len(parts) < 4:
            print(f"⚠️ Filename too short, skipping: {basename}")
            return None

        # Extract encoding (first part)
        encoding = parts[0]

        # Validate encoding
        valid_encodings = ["adjacency", "incident", "expert"]
        if encoding not in valid_encodings:
            print(f"⚠️ Invalid encoding '{encoding}', skipping: {basename}")
            return None

        # For prompt files, we know the structure better since we generate them ourselves
        # We can work backwards more systematically

        # Check if this looks like a question-based format (6+ parts)
        if len(parts) >= 6:
            # Format: encoding-pattern-system-pairs-question-target
            target = parts[-1]
            question_type = parts[-2]

            # Validate target
            if target not in ["input", "output"]:
                print(f"⚠️ Invalid target '{target}', skipping: {basename}")
                return None

            # Extract num_pairs (should be a number)
            try:
                num_pairs_str = parts[-3]
                num_pairs = int(num_pairs_str)
            except (ValueError, IndexError):
                print(
                    f"⚠️ Invalid num_pairs '{parts[-3] if len(parts) > 2 else 'missing'}', skipping: {basename}"
                )
                return None

            # System prompt
            system_prompt = parts[-4]

            # Size pattern is everything between encoding and system_prompt
            size_pattern_parts = parts[1:-4]

        else:
            # Format: encoding-pattern-system-pairs (full_output default)
            question_type = "full_output"
            target = "output"

            # Extract num_pairs (should be a number)
            try:
                num_pairs_str = parts[-1]
                num_pairs = int(num_pairs_str)
            except (ValueError, IndexError):
                print(f"⚠️ Invalid num_pairs '{parts[-1]}', skipping: {basename}")
                return None

            # System prompt
            if len(parts) < 4:
                print(f"⚠️ Invalid format, missing system_prompt: {basename}")
                return None

            system_prompt = parts[-2]

            # Size pattern is everything between encoding and system_prompt
            size_pattern_parts = parts[1:-2]

        # Reconstruct size pattern
        size_pattern = "-".join(size_pattern_parts) if size_pattern_parts else "default"

        # Validate system prompt
        valid_system_prompts = ["analyst", "programmer", "teacher", "none"]
        if system_prompt not in valid_system_prompts:
            print(f"⚠️ Invalid system_prompt '{system_prompt}', skipping: {basename}")
            return None

        # Validate and parse size pattern (same logic as before)
        sizes = None

        # Handle mixed patterns that might contain both hyphens (for explicit sizes) and underscores (for named patterns)
        # First, check if it's a named pattern that contains underscores
        if any(pattern in size_pattern for pattern in SIZE_PATTERNS.keys()):
            # Find the matching named pattern
            for pattern_name in SIZE_PATTERNS.keys():
                if pattern_name in size_pattern:
                    # If the size_pattern exactly matches a named pattern or contains it as a component
                    if size_pattern == pattern_name:
                        sizes = SIZE_PATTERNS[pattern_name]
                        break
                    # Handle cases where the pattern might be part of a larger string
                    elif pattern_name in size_pattern.split("-"):
                        # Check if any component exactly matches a named pattern
                        for component in size_pattern.split("-"):
                            if component in SIZE_PATTERNS:
                                sizes = SIZE_PATTERNS[component]
                                size_pattern = component  # Use the clean pattern name
                                break
                        if sizes:
                            break

        # If not found as named pattern, try to parse as explicit sizes
        if sizes is None:
            # Check if it looks like explicit size pattern (all numeric parts)
            size_components = size_pattern.split("-")
            try:
                sizes = [int(s) for s in size_components]

                # Basic validation: should have at least 2 sizes, all positive
                if len(sizes) < 2 or any(s <= 0 for s in sizes):
                    print(
                        f"⚠️ Invalid explicit size pattern '{size_pattern}', skipping: {basename}"
                    )
                    return None

            except ValueError:
                # If we can't parse as numbers, it might be a malformed pattern
                print(
                    f"⚠️ Unknown size pattern '{size_pattern}', using default: {basename}"
                )
                sizes = [10, 10]  # Default fallback

        # Validate question type (for new format)
        if question_type != "full_output":
            # Basic validation - no spaces, reasonable length
            if (
                " " in question_type
                or len(question_type) > 30
                or len(question_type) < 2
            ):
                print(
                    f"⚠️ Suspicious question_type '{question_type}', skipping: {basename}"
                )
                return None

        # Final validation: num_pairs should match size pattern
        expected_pairs = len(sizes) - 1 if sizes else 1
        if num_pairs != expected_pairs:
            print(
                f"⚠️ Mismatch: num_pairs={num_pairs} but size pattern suggests {expected_pairs}, skipping: {basename}"
            )
            return None

        return {
            "encoding": encoding,
            "size_pattern": size_pattern,
            "system_prompt": system_prompt,
            "question_type": question_type,
            "target": target,
            "num_pairs": num_pairs,
            "sizes": sizes,
        }

    except (ValueError, IndexError, OSError) as e:
        print(f"⚠️ Error parsing filename {basename}: {e}")
        return None


def extract_metadata_from_filename(filename: str) -> Dict[str, Any]:
    """
    Enhanced filename parser for response files that handles model names with multiple hyphens.

    The challenge is that both our delimiters and model names use hyphens.
    Model names like 'gpt-4.1-nano' and 'gpt-4.1-mini' contain multiple hyphens.

    Strategy: Work backwards from known model patterns to identify where the model name starts.

    Expected formats:
    - adjacency-scale_up_3-teacher-3-gpt-4.1-nano.txt (full_output)
    - adjacency-scale_up_3-teacher-3-node_count-output-gpt-4.1-nano.txt (question format)

    Returns:
    - Dictionary with encoding, size_pattern, system_prompt, model, question_type, target, n_pairs
    - None if filename cannot be parsed
    """
    try:
        basename = os.path.basename(filename) if "/" in filename else filename
        if not basename.endswith(".txt"):
            print(f"⚠️ Skipping non-txt file: {basename}")
            return None

        # Remove .txt extension
        name_without_ext = basename[:-4]
        parts = name_without_ext.split("-")

        if len(parts) < 4:
            print(f"⚠️ Filename too short, skipping: {basename}")
            return None

        # Extract encoding (first part)
        encoding = parts[0]

        # Validate encoding
        valid_encodings = ["adjacency", "incident", "expert"]
        if encoding not in valid_encodings:
            print(f"⚠️ Invalid encoding '{encoding}', skipping: {basename}")
            return None

        # Known model patterns (so we can strip the model name correctly and detect question_type)
        known_model_patterns = [
            "qwen-2.5-1.5b-instruct",
            "qwen-3-32b",
            "qwen-3-32b-v2",
            "qwen-3-32b-v3",
            "qwen-3-1.7b-v4",
            "qwen-3-4b-v4",
            "qwen-3-8b-v4",
            "qwen-3-14b-v4",
            "qwen-3-32b-v4",
            "mistral-7b-i-v0.2",
            "llama-7b-hf"
            "llama-2-7b-chat-hf",
            "llama-3-8b-i",
            "llama-3.1-8b-i",
            "olmo-2-7b-i",
            "deepseek-r1-distill-llama-8b",
            "gpt-4.1-nano",
            "gpt-4.1-mini",
            "gpt-4o-mini",
            "o1-mini",
            "o3-mini",
            "o4-mini",
            "gpt-4.1",
            "gpt-5",
            "qwq-32b",
            "deepseek-r1",
            "gemini-2.0-flash-lite",
            "gemini-2.5-pro-preview-03-25",
            "gemini-2.0-fl",
            "gemini-2.5-pro",
        ]

        # Find where the model name starts by looking for known patterns
        model_start_idx = None
        model = None

        # Try to find longest matching model pattern first (to prefer gpt-4.1-nano over gpt-4.1)
        sorted_patterns = sorted(known_model_patterns, key=len, reverse=True)

        for pattern in sorted_patterns:
            pattern_parts = pattern.split("-")
            pattern_length = len(pattern_parts)

            # Check if we can find this pattern at the end of our parts
            for i in range(len(parts) - pattern_length + 1, len(parts) + 1):
                if i >= pattern_length:
                    candidate_parts = parts[i - pattern_length : i]
                    candidate_model = "-".join(candidate_parts)

                    if candidate_model == pattern:
                        model_start_idx = i - pattern_length
                        model = candidate_model
                        break

            if model_start_idx is not None:
                break

        # If we didn't find a known model pattern, use fallback logic
        if model_start_idx is None:
            # Fallback: assume model is the last 1-3 parts, but prefer longer matches
            for length in [
                3,
                2,
                1,
            ]:  # Try 3-part model names first, then 2-part, then 1-part
                if len(parts) >= length:
                    potential_model = "-".join(parts[-length:])
                    # Simple validation: model should not be purely numeric or contain invalid chars
                    if not potential_model.replace("-", "").replace(".", "").isdigit():
                        model = potential_model
                        model_start_idx = len(parts) - length
                        break

            if model is None:
                print(f"⚠️ Could not identify model in filename: {basename}")
                return None

        # Clean up model name (remove -batch-api suffix if present)
        if model.endswith("-batch-api"):
            model = model[:-10]

        # Apply model name shortcuts ONLY if they don't conflict with full names
        model_shortcuts = {
            "gemini-2.5-pro-preview-03-25": "gemini-2.5-pro",
            "gemini-2.0-flash-lite": "gemini-2.0-fl",
        }
        if model in model_shortcuts:
            model = model_shortcuts[model]

        # Now work with the middle parts (everything between encoding and model)
        middle_parts = parts[1:model_start_idx]

        if len(middle_parts) < 2:
            print(f"⚠️ Not enough middle parts between encoding and model: {basename}")
            return None

        # Determine question type and target by inspecting the end of middle_parts
        question_type = "full_output"
        target = "output"

        if len(middle_parts) >= 2 and middle_parts[-1] in ["input", "output"]:
            # question-based filename format: ...-<question_type>-<target>-<model>.txt
            target = middle_parts[-1]
            question_type = middle_parts[-2]
            middle_parts = middle_parts[:-2]

        # Identify system_prompt within middle_parts and derive size_pattern parts
        valid_system_prompts = ["analyst", "programmer", "teacher", "none"]
        system_prompt = None
        size_pattern_parts = []

        # Look for system prompt from the end of middle_parts (working backwards)
        for idx in range(len(middle_parts) - 1, -1, -1):
            if middle_parts[idx] in valid_system_prompts:
                system_prompt = middle_parts[idx]
                size_pattern_parts = middle_parts[:idx]
                break

        if system_prompt is None:
            print(f"⚠️ Could not identify system_prompt in filename: {basename}")
            return None

        # Everything before system_prompt is the size pattern
        size_pattern = "-".join(size_pattern_parts) if size_pattern_parts else "unknown"

        # Validate and parse size pattern
        sizes = None

        # Check if it's a named pattern
        if size_pattern in SIZE_PATTERNS:
            sizes = SIZE_PATTERNS[size_pattern]
        else:
            # Check if any component is a named pattern (for patterns like "scale_up_3")
            for component in size_pattern.split("-"):
                if component in SIZE_PATTERNS:
                    sizes = SIZE_PATTERNS[component]
                    size_pattern = component
                    break

            # If still not found, try to parse as explicit sizes
            if sizes is None:
                try:
                    size_components = size_pattern.split("-")
                    sizes = [int(s) for s in size_components]

                    # Basic validation: should have at least 2 sizes, all positive
                    if len(sizes) < 2 or any(s <= 0 for s in sizes):
                        print(
                            f"⚠️ Invalid explicit size pattern '{size_pattern}', skipping: {basename}"
                        )
                        return None

                except ValueError:
                    # Default fallback
                    sizes = [10, 10]
                    print(
                        f"⚠️ Unknown size pattern '{size_pattern}', using default: {basename}"
                    )

        # Deduce number of example pairs from size pattern
        n_pairs = len(sizes) - 1

        # Validate question type (for non-full_output)
        if question_type != "full_output":
            if (
                " " in question_type
                or len(question_type) > 30
                or len(question_type) < 2
            ):
                print(
                    f"⚠️ Suspicious question_type '{question_type}', skipping: {basename}"
                )
                return None

        return {
            "encoding": encoding,
            "size_pattern": size_pattern,
            "system_prompt": system_prompt,
            "question_type": question_type,
            "target": target,
            "num_pairs": n_pairs,
            "n_pairs": n_pairs,  # Also include this alias
            "sizes": sizes,
            "model": model,  # This should now preserve full model names
        }

    except (ValueError, IndexError, OSError) as e:
        print(f"⚠️ Error parsing filename {basename}: {e}")
        return None
