import re
import os
import ast
import json
import random
from openai import OpenAI
from typing import Dict, Tuple
from used_prompt import get_prompt
from rouge_score import rouge_scorer


class JudgeProgramGenerator:
    def __init__(self, storage_dir: str = "synthesized_program_judges", similarity_threshold: float = 0.60):

        self.client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
        self.storage_dir = storage_dir
        self.similarity_threshold = similarity_threshold
        self.rouge_scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
        os.makedirs(storage_dir, exist_ok=True)
        
        self.available_criteria = [
            "Factual Accuracy", \
            "Logical Coherence", \
            "Clarity and Conciseness", \
            "Completeness/Coverage of Answer", \
            "Relevance to Query (Semantic Similarity)", \
            "Language Quality/Readability", \
            "Bias Detection", \
            "Safety and Toxicity", \
            "Response Verbosity/Redundancy"
        ]
        
        self.programs = self.load_existing_programs()
    
    def load_existing_programs(self) -> Dict:
        programs = {}
        metadata_file = os.path.join(self.storage_dir, 'programs_metadata.json')
        if os.path.exists(metadata_file):
            with open(metadata_file, 'r') as f:
                metadata = json.load(f)
                for prog_id, meta in metadata.items():
                    prog_file = os.path.join(self.storage_dir, f"{prog_id}.py")
                    if os.path.exists(prog_file):
                        with open(prog_file, 'r') as f:
                            programs[prog_id] = {
                                'code': f.read(),
                                'description': meta['description'],
                                'function_name': meta['function_name'],
                                'criteria': meta.get('criteria', 'Unknown'),
                                'executable': meta.get('executable', False)
                            }
        return programs
    
    def generate_judge_program(self, criterion: str, count: int) -> Tuple[str, str, str]:
        print(f"Generating program for criterion: {criterion}")
        function_name = f"program_judge_{count + 1}"
        prompt = get_prompt(function_name, criterion)
                
        try:
            response = self.client.chat.completions.create(
                model="gpt-4",
                messages=[{"role": "user", "content": prompt}],
                max_tokens=2000,
                temperature=0.7
            )
            generated_text = response.choices[0].message.content.strip()
            pattern = r'```python\n(.*?)\n```'
            matches = re.findall(pattern, generated_text, re.DOTALL)
            if not matches:
                raise Exception("No Python code block found in generated response")

            code = matches[0].strip()
            print(f"Generated raw code: \n{code}")
            description = f"Judges responses based on {criterion.lower()}"
            return code, description, function_name
            
        except Exception as e:
            raise Exception(f"Failed to generate program: {str(e)}")
            
    def compute_similarity(self, code1: str, code2: str) -> float:
        def clean_code(code: str) -> str:
            lines = [line.split('#')[0].strip() for line in code.split('\n') if line.strip() and not line.strip().startswith('#')]
            return ' '.join(lines)
        
        clean_code1 = clean_code(code1)
        clean_code2 = clean_code(code2)
        scores = self.rouge_scorer.score(clean_code1, clean_code2)
        return scores['rougeL'].fmeasure
    
    def is_similar_to_existing(self, new_code: str) -> bool:
        for prog_data in self.programs.values():
            if self.compute_similarity(new_code, prog_data['code']) > self.similarity_threshold:
                return True
        return False
    
    def validate_program(self, code: str, function_name: str) -> bool:
        try:
            compile(code, '<string>', 'exec')
            namespace = {}
            exec(code, namespace)
            if function_name not in namespace:
                return False
            result = namespace[function_name]("Sample query", "Sample response")
            return isinstance(result, dict) and all(key in result for key in ['score', 'reasoning', 'criteria']) and isinstance(result['score'], (int, float)) and 0 <= result['score'] <= 10
        except Exception:
            return False
    
    def save_program(self, code: str, description: str, function_name: str, criterion: str, executable: bool) -> str:
        # Load existing metadata
        metadata_file = os.path.join(self.storage_dir, 'programs_metadata.json')
        metadata = {}
        if os.path.exists(metadata_file):
            try:
                with open(metadata_file, 'r') as f:
                    metadata = json.load(f)
            except json.JSONDecodeError:
                print("Warning: Metadata file is corrupted or empty. Starting with an empty metadata dictionary.")
    
        # Determine the next program ID based on existing programs
        existing_ids = [int(prog_id.split('_')[1]) for prog_id in metadata.keys() if prog_id.startswith('judge_')]
        next_id = max(existing_ids, default=0) + 1
        prog_id = f"judge_{next_id:04d}"
    
        # Save the program code to a file
        code_file = os.path.join(self.storage_dir, f"{prog_id}.py")
        with open(code_file, 'w') as f:
            f.write(code)
    
        # Update metadata with the new program
        metadata[prog_id] = {
            'description': description,
            'function_name': function_name,
            'criteria': criterion,
            'executable': bool(executable),  # Ensure executable is a boolean
            'file_path': code_file
        }
    
        # Save the updated metadata back to the file
        try:
            with open(metadata_file, 'w') as f:
                json.dump(metadata, f, indent=2)
        except TypeError as e:
            print(f"Error serializing metadata: {str(e)}")
            raise
    
        # Update the in-memory programs dictionary
        self.programs[prog_id] = {
            'code': code,
            'description': description,
            'function_name': function_name,
            'criteria': criterion,
            'executable': executable
        }
        return prog_id
    
    def generate_program_pool(self, target_count: int = 5, max_attempts: int = 10):
        attempts = 0
        while len(self.programs) < target_count and attempts < max_attempts:
            print("--------------------------------------------------")
            attempts += 1
            criterion = random.choice(self.available_criteria)
            code, description, function_name = self.generate_judge_program(criterion, len(self.programs))
            
            if self.is_similar_to_existing(code):
                continue
                
            executable = self.validate_program(code, function_name)
            prog_id = self.save_program(code, description, function_name, criterion, executable)
            print(f"Saved {prog_id} (Executable: {executable})")
        
        print(f"Generated {len(self.programs)} programs in {attempts} attempts")
        return self.programs
    
    def load_program(self, prog_id: str):
        if prog_id not in self.programs:
            raise ValueError(f"Program {prog_id} not found")
        namespace = {}
        exec(self.programs[prog_id]['code'], namespace)
        return namespace[self.programs[prog_id]['function_name']]

if __name__ == "__main__":

    generator = JudgeProgramGenerator()
    generator.generate_program_pool(target_count=60, max_attempts=70)
    
    for prog_id, prog_data in generator.programs.items():
        print(f"{prog_id}: {prog_data['description']} (Executable: {prog_data['executable']})")