import re
import json
import os


def extract_code_from_string(input_string):
    """Extract code from markdown code blocks or return the string as-is."""
    # Match code within ```python ... ``` or ``` ... ``` blocks
    pattern = r'```(?:python)?\s*(.*?)\s*```'
    
    # Find all matches in the input string
    code_blocks = re.findall(pattern, input_string, re.DOTALL)

    if len(code_blocks) == 0:
        # No code blocks found, return the entire string
        return input_string
    elif len(code_blocks) == 1:
        return code_blocks[0]

    # Filter out pip installation blocks and join multiple code blocks
    code_blocks = [code for code in code_blocks if 'pip' not in code]
    return '\n'.join(code_blocks)


def read_problem(dataset, problem_name):
    """Read problem from original dataset structure."""
    base_dir = 'dataset'
    with open(os.path.join(base_dir, dataset, problem_name, 'description.txt'), 'r', encoding='utf8') as f:
        description = f.read()

    with open(os.path.join(base_dir, dataset, problem_name, 'code_example.py'), 'r', encoding='utf8') as f:
        code_example = f.read()

    return {
        'description': description,
        'code_example': code_example
    }


def read_enhanced_problem(problem_dir):
    """
    Read enhanced problem from Stage 1 output.
    
    Args:
        problem_dir: Path to the problem directory containing enhanced_problem_description.md
        
    Returns:
        dict: Problem data with description and code_example
    """
    enhanced_desc_path = os.path.join(problem_dir, 'enhanced_problem_description.md')
    
    if not os.path.exists(enhanced_desc_path):
        raise FileNotFoundError(f"Enhanced problem description not found: {enhanced_desc_path}")
    
    with open(enhanced_desc_path, 'r', encoding='utf8') as f:
        content = f.read()
    
    # Extract description and code example from markdown
    # Look for code blocks that contain function definitions
    code_pattern = r'```python\s*(def\s+\w+.*?)```'
    code_matches = re.findall(code_pattern, content, re.DOTALL)
    
    if code_matches:
        code_example = code_matches[0]  # Take the first function definition
    else:
        # Fallback: create a generic function template
        code_example = """def solve_optimization_problem():
    # Implementation to be generated by Chain-of-Experts
    pass"""
    
    return {
        'description': content,
        'code_example': code_example
    }