#!/usr/bin/env python3
# Add the JSON extraction method
import re

# Read the current file
with open('utils/refine_iteration/llm_code_selector.py', 'r') as f:
    lines = content.split('\n')

# Find the line with "return prompt" and add the method after it
new_lines = []
for i, line in enumerate(lines):
    new_lines.append(line)
    if 'return prompt' in line:
        # Add the new method after this line
        new_lines.append('')
        new_lines.append('    def _extract_json_from_response(self, content: str) -> List[str]:')
        new_lines.append('        """Extract JSON array from LLM response, handling thinking mode and various formats"""')
        new_lines.append('        if not content:')
        new_lines.append('            return []')
        new_lines.append('')
        new_lines.append('        # Strategy 1: Remove <think> tags and extract JSON from cleaned content')
        new_lines.append('        cleaned_content = re.sub(r\'<think>.*?</think>\', \'\', content, flags=re.DOTALL).strip()')
        new_lines.append('')
        new_lines.append('        # Try to find JSON array in cleaned content')
        new_lines.append('        json_match = re.search(r\'\\[\\s*"[^"]*"(?:\\s*,\\s*"[^"]*")*\\s*\\]\', cleaned_content, re.DOTALL)')
        new_lines.append('        if json_match:')
        new_lines.append('            try:')
        new_lines.append('                selected_codes = json.loads(json_match.group())')
        new_lines.append('                if isinstance(selected_codes, list) and all(isinstance(item, str) for item in selected_codes):')
        new_lines.append('                    return selected_codes')
        new_lines.append('            except json.JSONDecodeError:')
        new_lines.append('                pass')
        new_lines.append('')
        new_lines.append('        # Strategy 2: Look for JSON array anywhere in the original response')
        new_lines.append('        json_match = re.search(r\'\\[\\s*"[^"]*"(?:\\s*,\\s*"[^"]*")*\\s*\\]\', content, re.DOTALL)')
        new_lines.append('        if json_match:')
        new_lines.append('            try:')
        new_lines.append('                selected_codes = json.loads(json_match.group())')
        new_lines.append('                if isinstance(selected_codes, list) and all(isinstance(item, str) for item in selected_codes):')
        new_lines.append('                    return selected_codes')
        new_lines.append('            except json.JSONDecodeError:')
        new_lines.append('                pass')
        new_lines.append('')
        new_lines.append('        # Strategy 3: Extract individual quoted strings as fallback')
        new_lines.append('        string_matches = re.findall(r\'"[^"]*"\', content)')
        new_lines.append('        if string_matches:')
        new_lines.append('            # Remove quotes and return as list')
        new_lines.append('            return [match.strip(\'"\') for match in string_matches if match.strip(\'"\')]')
        new_lines.append('')
        new_lines.append('        return []')

# Write the updated file
with open('utils/refine_iteration/llm_code_selector.py', 'w') as f:
    f.write('\n'.join(new_lines))

print('✅ Added _extract_json_from_response method')
