import hydra
import numpy as np 
import json
import logging 
import matplotlib.pyplot as plt
import os
import openai
import re
import subprocess
from pathlib import Path
import shutil
import time 
from typing import Dict, List
import datetime
import math
import torch
import torch.nn as nn
from difflib import SequenceMatcher
from transformers import AutoTokenizer, AutoModel

from utils.misc import * 
from utils.file_utils import find_files_with_substring, load_tensorboard_logs
from utils.create_task import create_task
from utils.extract_task_code import *
from bayesian_weight_optimizer import BayesianWeightOptimizer
from code_weight_reward import (
    extract_reward_components,
    calculate_reward_frequencies,
    calculate_usefulness_scores,
    update_reward_weights
)
from similarity_utils import (
    get_bge_model,
    compute_semantic_similarity,
    compute_string_similarity,
    normalize_reward_name,
    get_reward_description,
    normalize_reward_items,
    find_similar_groups
)

EUREKA_ROOT_DIR = os.getcwd()
ISAAC_ROOT_DIR = f"{EUREKA_ROOT_DIR}/../isaacgymenvs/isaacgymenvs"

@hydra.main(config_path="cfg", config_name="config", version_base="1.1")
def main(cfg):
    workspace_dir = Path.cwd()
    logging.info(f"Workspace: {workspace_dir}")
    logging.info(f"Project Root: {EUREKA_ROOT_DIR}")

    openai.api_base = cfg.openai_api_base
    openai.api_key = cfg.openai_api_key

    task = cfg.env.task
    task_description = cfg.env.description
    suffix = cfg.suffix
    model = cfg.model
    logging.info(f"Using LLM: {model}")
    logging.info("Task: " + task)
    logging.info("Task description: " + task_description)

    env_name = cfg.env.env_name.lower()
    env_parent = 'isaac' if f'{env_name}.py' in os.listdir(f'{EUREKA_ROOT_DIR}/envs/isaac') else 'bidex'
    task_file = f'{EUREKA_ROOT_DIR}/envs/{env_parent}/{env_name}.py'
    task_obs_file = f'{EUREKA_ROOT_DIR}/envs/{env_parent}/{env_name}_obs.py'
    shutil.copy(task_obs_file, f"env_init_obs.py")
    task_code_string  = file_to_string(task_file)
    task_obs_code_string  = file_to_string(task_obs_file)
    output_file = f"{ISAAC_ROOT_DIR}/tasks/{env_name}{suffix.lower()}.py"

    prompt_dir = f'{EUREKA_ROOT_DIR}/utils/prompts'
    initial_system = file_to_string(f'{prompt_dir}/initial_system.txt')
    code_output_tip = file_to_string(f'{prompt_dir}/code_output_tip.txt')
    code_feedback = file_to_string(f'{prompt_dir}/code_feedback.txt')
    initial_user = file_to_string(f'{prompt_dir}/initial_user.txt')
    reward_signature = file_to_string(f'{prompt_dir}/reward_signature.txt')
    policy_feedback = file_to_string(f'{prompt_dir}/policy_feedback.txt')
    execution_error_feedback = file_to_string(f'{prompt_dir}/execution_error_feedback.txt')

    initial_system = initial_system.format(task_reward_signature_string=reward_signature) + code_output_tip
    initial_user = initial_user.format(task_obs_code_string=task_obs_code_string, task_description=task_description)
    messages = [{"role": "system", "content": initial_system}, {"role": "user", "content": initial_user}]

    task_code_string = task_code_string.replace(task, task+suffix)
   
    create_task(ISAAC_ROOT_DIR, cfg.env.task, cfg.env.env_name, suffix)

    DUMMY_FAILURE = -10000.
    max_successes = []
    max_successes_reward_correlation = []
    execute_rates = []
    best_code_paths = []
    max_success_overall = DUMMY_FAILURE
    max_success_reward_correlation_overall = DUMMY_FAILURE
    max_reward_code_path = None 
    
   
    for iter in range(cfg.iteration):
        # Get Eureka response
        responses = []
        total_samples = 0
        total_token = 0
        total_completion_token = 0
        
        logging.info(f"Iteration {iter}: Generating {cfg.sample} samples with {cfg.model}")
        
       
        for sample_idx in range(cfg.sample):
            for attempt in range(1000):
                try:
                    response_cur = openai.ChatCompletion.create(
                        model=model,
                        messages=messages,
                        temperature=cfg.temperature,
                        n=1  
                    )
                    responses.append(response_cur.choices[0])
                    total_samples += 1
                    
                  
                    total_completion_token += response_cur.usage.completion_tokens
                    total_token += response_cur.usage.total_tokens
                    if sample_idx == 0:
                        prompt_tokens = response_cur.usage.prompt_tokens
                        
                    break
                except Exception as e:
                    if attempt >= 10:
                        logging.info(f"Failed to generate sample {sample_idx} after 10 attempts")
                        break
                    logging.info(f"Attempt {attempt+1} failed with error: {e}")
                    time.sleep(1)
        
        if total_samples == 0:
            logging.info("No samples were successfully generated!")
            continue
            
        logging.info(f"Iteration {iter}: Prompt Tokens: {prompt_tokens}, Completion Tokens: {total_completion_token}, Total Tokens: {total_token}")
        
        code_runs = []
        
        iter_dir = f"iteration_{iter}"
        os.makedirs(iter_dir, exist_ok=True)
        
        sample_results = []
        
        for response_id in range(cfg.sample):
            response_cur = responses[response_id]["message"]["content"]
            logging.info(f"Iteration {iter}: Processing Code Run {response_id}")

           
            patterns = [
                r'```python(.*?)```',
                r'```(.*?)```',
                r'"""(.*?)"""',
                r'""(.*?)""',
                r'"(.*?)"',
            ]
            for pattern in patterns:
                code_string = re.search(pattern, response_cur, re.DOTALL)
                if code_string is not None:
                    code_string = code_string.group(1).strip()
                    break
            code_string = response_cur if not code_string else code_string

            # Remove unnecessary imports
            lines = code_string.split("\n")
            for i, line in enumerate(lines):
                if line.strip().startswith("def "):
                    code_string = "\n".join(lines[i:])
                    
            # Add the Eureka Reward Signature to the environment code
            try:
                gpt_reward_signature, input_lst = get_function_signature(code_string)
            except Exception as e:
                logging.info(f"Iteration {iter}: Code Run {response_id} cannot parse function signature!")
                continue

            
            sample_dir = os.path.join(iter_dir, f"sample_{response_id}")
            os.makedirs(sample_dir, exist_ok=True)
            
            
            reward_signature = [
                f"self.rew_buf[:], self.rew_dict = {gpt_reward_signature}",
                f"self.extras['gpt_reward'] = self.rew_buf.mean()",
                f"for rew_state in self.rew_dict: self.extras[rew_state] = self.rew_dict[rew_state].mean()",
            ]
            indent = " " * 8
            reward_signature = "\n".join([indent + line for line in reward_signature])
            
          
            if "def compute_reward(self)" in task_code_string:
                task_code_string_iter = task_code_string.replace("def compute_reward(self):", "def compute_reward(self):\n" + reward_signature)
            elif "def compute_reward(self, actions)" in task_code_string:
                task_code_string_iter = task_code_string.replace("def compute_reward(self, actions):", "def compute_reward(self, actions):\n" + reward_signature)
            else:
                raise NotImplementedError
            
            env_file_path = os.path.join(sample_dir, "environment.py")
            with open(env_file_path, 'w') as file:
                file.writelines(task_code_string_iter + '\n')
                file.writelines("from typing import Tuple, Dict" + '\n')
                file.writelines("import math" + '\n')
                file.writelines("import torch" + '\n')
                file.writelines("from torch import Tensor" + '\n')
                if "@torch.jit.script" not in code_string:
                    code_string = "@torch.jit.script\n" + code_string
                file.writelines(code_string + '\n')

            reward_file_path = os.path.join(sample_dir, "reward.py")
            with open(reward_file_path, 'w') as file:
                file.writelines(code_string + '\n')

            legacy_env_path = os.path.join(iter_dir, f"response_{response_id}.py")
            shutil.copy(env_file_path, legacy_env_path)

           
            components = extract_reward_components(code_string)
            reward_info_path = os.path.join(sample_dir, "reward_info.txt")
            with open(reward_info_path, 'w') as f:
                f.write(f"=== Reward Components for Sample {response_id} ===\n")
                f.write("Weights:\n")
                for weight_name, weight_value in components.items():
                    if '_weight' in weight_name:
                        f.write(f"{weight_name}: {weight_value}\n")
                f.write("\nReward Items:\n")
                if 'reward_items' in components:
                    for item in components['reward_items']:
                        f.write(f"- {item}\n")
            
            sample_results.append({
                'code': code_string,
                'reward_items': components.get('reward_items', []),
                'sample_dir': sample_dir,
                'reward_path': reward_file_path,
                'env_path': env_file_path
            })
            code_runs.append(code_string)

        U_r = calculate_reward_frequencies(sample_results)
        
        with open(os.path.join(iter_dir, "U_r.txt"), 'w') as f:
            f.write("=== Reward Item U_r ===\n")
            for item, freq in sorted(U_r.items(), key=lambda x: x[1], reverse=True):
                f.write(f"{item}: {freq:.3f}\n")
        
        usefulness_scores, score_std, score_range = calculate_usefulness_scores(sample_results, U_r)
        
       
        with open(os.path.join(iter_dir, "usefulness_scores.txt"), 'w') as f:
            f.write("=== Sample Usefulness Scores ===\n")
            f.write(f"Score Standard Deviation: {score_std:.3f}\n")
            f.write(f"Score Range: {score_range:.3f}\n\n")
            f.write("Individual Sample Scores:\n")
            for score in usefulness_scores:
                f.write(f"\nSample {score['index']}:\n")
                f.write(f"Usefulness Score: {score['usefulness_score']:.3f}\n")
                f.write(f"Reward Items: {', '.join(score['reward_items'])}\n")


        iter_messages_path = os.path.join(iter_dir, 'messages.json')
        with open(iter_messages_path, 'w') as file:
            json.dump(messages, file, indent=4, ensure_ascii=False)
            
       
        history_file = 'messages_history.json'
        if os.path.exists(history_file):
            with open(history_file, 'r') as file:
                history = json.load(file)
        else:
            history = {}
            
        history[f'iteration_{iter}'] = {
            'messages': messages,
            'timestamp': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        }
        
        with open(history_file, 'w') as file:
            json.dump(history, file, indent=4, ensure_ascii=False)

        should_stop = score_std < 0.02 and score_range < 0.1

       
        similar_groups = find_similar_groups(sample_results)
        
        print("\n===Reward uncertainty's score ===")
        for item, freq in sorted(U_r.items(), key=lambda x: x[1], reverse=True):
            print(f"{item}: {freq:.3f}")

        print("\n=== similarity group ===")
        if similar_groups:
            for i, group in enumerate(similar_groups):
                print(f"\ngroup {i+1}:")
                for idx in group:
                    print(f"Sample {idx}: {', '.join(sample_results[idx]['reward_items'])}")
        else:
            print("Not find similarity group")

        selected_samples = set()

        print("\n=== Select group===")
        for group in similar_groups:
            group_scores = [s for s in usefulness_scores if s['index'] in group]
            selected = min(group_scores, key=lambda x: x['usefulness_score'])
            selected_samples.add(selected['index'])
            print(f"\nFrom group {[i for i in group]} Select Sample {selected['index']}:")
            print(f"Score: {selected['usefulness_score']:.3f}")
            print(f"Reward item: {', '.join(sample_results[selected['index']]['reward_items'])}")

        all_grouped = set().union(*similar_groups) if similar_groups else set()
        remaining_samples = set(range(len(sample_results))) - all_grouped
        selected_samples.update(remaining_samples)

        print("\n=== ungrouped samples ===")
        for idx in remaining_samples:
            score = next(s['usefulness_score'] for s in usefulness_scores if s['index'] == idx)
            print(f"\nSample {idx}:")
            print(f"score: {score:.3f}")
            print(f"reward item: {', '.join(sample_results[idx]['reward_items'])}")

        selected_samples = sorted(selected_samples, 
                                key=lambda idx: next(s['usefulness_score'] 
                                                   for s in usefulness_scores if s['index'] == idx))
        
        print("\n=== The final training sequence chosen ===")
        for i, idx in enumerate(selected_samples):
            score = next(s['usefulness_score'] for s in usefulness_scores if s['index'] == idx)
            print(f"\n{i+1}. Sample {idx}:")
            print(f"score: {score:.3f}")
            print(f"reward item: {', '.join(sample_results[idx]['reward_items'])}")
            
        with open(os.path.join(iter_dir, "selected_samples.txt"), 'w') as f:
            f.write("=== Selected Samples for Training ===\n")
            for idx in selected_samples:
                score = next(s['usefulness_score'] for s in usefulness_scores if s['index'] == idx)
                f.write(f"Sample {idx}:\n")
                f.write(f"Usefulness Score: {score:.3f}\n")
                f.write("Reward Items:\n")
                for item in sample_results[idx]['reward_items']:
                    f.write(f"- {item}\n")
                f.write("\n")

        print("\nStart training and optimizing selected samples...")
        optimizers = {}
        current_weights_list = {}
        n_optimization_rounds = 10  
        rl_runs = []

        for i, sample_idx in enumerate(selected_samples):
            print(f"\nTraining sample {sample_idx} ({i+1}/{len(selected_samples)})...")
            
            env_path = sample_results[sample_idx]['env_path']
            copy_env_file(env_path, os.path.dirname(env_path), task_name=env_name)
            
            initial_weights = extract_reward_components(sample_results[sample_idx]['code'])
            
            valid_weights = {k: v for k, v in initial_weights.items() 
                            if isinstance(v, (int, float))}
            
            if not valid_weights:
                print(f"Warning: Sample {sample_idx} does not have any optimizable weights, will only train once")
                n_rounds = 1    
            else:
                try:
                    n_rounds = n_optimization_rounds
                    optimizers[sample_idx] = BayesianWeightOptimizer(U_r, valid_weights)
                    current_weights_list[sample_idx] = valid_weights.copy()
                except Exception as e:
                    print(f"Warning: Error initializing optimizer ({str(e)}), will only train once")
                    n_rounds = 1  
            
            sample_dir = sample_results[sample_idx]['sample_dir']
            weight_file = os.path.join(sample_dir, "weight_history.txt")
            with open(weight_file, 'w') as f:
                f.write(f"Sample {sample_idx} weight optimization history\n")
                f.write("="*50 + "\n\n")
                f.write("Initial weights:\n")
                for weight_name, weight_value in initial_weights.items():
                    if isinstance(weight_value, (int, float)):
                        f.write(f"{weight_name}: {weight_value:.4f}\n")
                f.write("\n")
            
            best_score = float('-inf')
            no_improvement_count = 0
            
            for round_idx in range(n_rounds):
                print(f"\n=== sample {sample_idx} optimization round {round_idx + 1}/{n_rounds} ===")
                
                set_freest_gpu()
                sample_dir = sample_results[sample_idx]['sample_dir']
                rl_filepath = os.path.join(sample_dir, f"training_round_{round_idx}.txt")
                weight_file = os.path.join(sample_dir, "weight_history.txt")
                
                with open(rl_filepath, 'w') as f:
                    env_path = sample_results[sample_idx]['env_path']
                    train_dir = os.path.join(os.path.dirname(rl_filepath), f"train_round_{round_idx}")
                    os.makedirs(train_dir, exist_ok=True)
                    
                    training_env_path = os.path.join(train_dir, "env.py")
                    shutil.copy(env_path, training_env_path)
                    
                    print(f"\n=== training environment file information ===")
                    print(f"source file: {env_path}")
                    print(f"target file: {training_env_path}")
                    
                    with open(training_env_path, 'r') as env_f:
                        env_code = env_f.read()
                        current_weights = extract_reward_components(env_code)
                        print("\ncurrent reward function weights:")
                        for name, value in current_weights.items():
                            if isinstance(value, (int, float)):
                                print(f"{name}: {value:.4f}")
                    
                    process = subprocess.Popen(['python', '-u', f'{ISAAC_ROOT_DIR}/train.py',  
                                            'hydra/output=subprocess',
                                            f'task={task}{suffix}', 
                                            f'wandb_activate={cfg.use_wandb}',
                                            f'wandb_entity={cfg.wandb_username}', 
                                            f'wandb_project={cfg.wandb_project}',
                                            f'headless={not cfg.capture_video}', 
                                            f'capture_video={cfg.capture_video}', 
                                            'force_render=False',
                                            f'max_iterations={cfg.max_iterations}'],
                                            stdout=f, stderr=f,
                                            cwd=train_dir)  
                    process.wait()
                
                if process.returncode == 0:
                    print(f"sample {sample_idx} training completed successfully!")
                    
               
                    with open(rl_filepath, 'r') as f:
                        stdout_str = f.read()
                    
                    lines = stdout_str.split('\n')
                    for line in lines:
                        if line.startswith('Tensorboard Directory:'):
                         
                            tensorboard_logdir = line.split(':', 1)[1].strip()
                            print(f"\nTensorboard original path: {tensorboard_logdir}")
                            
                      
                            if not os.path.isabs(tensorboard_logdir):
                                tensorboard_logdir = os.path.join(train_dir, 'runs', os.path.basename(tensorboard_logdir), 'summaries')
                            
                            print(f"Tensorboard final path: {tensorboard_logdir}")
                            break
            
                    max_retries = 3
                    for attempt in range(max_retries):
                        try:         
                            if not os.path.exists(tensorboard_logdir):
                                logging.error(f"Tensorboard directory does not exist: {tensorboard_logdir}")
                                logging.error(f"Current working directory: {os.getcwd()}")
                                logging.error(f"Directory content: {os.listdir(os.path.dirname(tensorboard_logdir)) if os.path.exists(os.path.dirname(tensorboard_logdir)) else 'Parent directory does not exist'}")
                                time.sleep(2)  
                                continue
                            
                            files = os.listdir(tensorboard_logdir)
                            event_files = [f for f in files if f.startswith('events.out.tfevents')]
                            if not event_files:
                                logging.error(f"Tensorboard event file not found: {attempt+1}/{max_retries}")
                                logging.info(f"Directory content: {files}")
                                time.sleep(2)  
                                continue 
                        
                            time.sleep(1)
                                    
                            tensorboard_logs = load_tensorboard_logs(tensorboard_logdir)
                            logging.info(f"Successfully loaded tensorboard logs, including the following metrics: {list(tensorboard_logs.keys())}")
                            break
                            
                        except Exception as e:
                            logging.error(f"Attempt {attempt+1}/{max_retries} failed: {str(e)}")
                            if attempt == max_retries - 1:
                                raise  
                            time.sleep(2)       
         
                    if "consecutive_successes" in tensorboard_logs:
                        score = max(tensorboard_logs["consecutive_successes"])
                    else:
                        score = max(tensorboard_logs.get("gt_reward", [0]))
                    
                    print(f"Current success rate: {score:.4f}")

            
                    if 'best_score' not in sample_results[sample_idx] or score > sample_results[sample_idx]['best_score']:
                        sample_results[sample_idx].update({
                            'best_score': score,
                            'best_tensorboard_logs': tensorboard_logs
                        })
                    
                    if score > best_score + 0.01:  
                        best_score = score
                        no_improvement_count = 0
                    else:
                        no_improvement_count += 1
                                  
                    if no_improvement_count >= 2:
                        print(f"Continuous {no_improvement_count} rounds without improvement, early termination of optimization")
                        break
                    
                    if round_idx < n_rounds - 1 and valid_weights:
                   
                        next_weights, _ = optimizers[sample_idx].optimize(
                            current_score=score,
                            previous_weights=current_weights_list[sample_idx],
                            beta=2.0
                        )
                             
                 
                        updated_reward_code = update_reward_weights(sample_results[sample_idx]['code'], next_weights)
                        
                 
                        with open(sample_results[sample_idx]['reward_path'], 'w') as f:
                            f.write(updated_reward_code)
                            
        
                        with open(sample_results[sample_idx]['env_path'], 'r') as f:
                            env_code = f.read()
       
                        updated_env_code = update_reward_weights(env_code, next_weights)
                        
   
                        with open(sample_results[sample_idx]['env_path'], 'w') as f:
                            f.write(updated_env_code)

                        copy_env_file(sample_results[sample_idx]['env_path'], os.path.dirname(sample_results[sample_idx]['env_path']), task_name=env_name)
                        
     
                        print("\nWeight update:")
                        weights_changed = False
                        has_numeric_weights = False
                        
                  
                        print("\nCurrent weight list:")
                        for name, value in current_weights_list[sample_idx].items():
                            print(f"Current {name}: {value} (type: {type(value)})")
                        
                        print("\nNext weight list:")
                  
                        next_weights = {name: float(value) for name, value in next_weights.items()}
                        
                        
                        
                        for name, value in next_weights.items():
                            print(f"Next {name}: {value} (type: {type(value)})")
                            if isinstance(value, (int, float)):  
                                has_numeric_weights = True
                                old_value = float(current_weights_list[sample_idx].get(name, 0.0))
                                change = ((value - old_value) / old_value * 100) if old_value != 0 else float('inf')
                                print(f"{name}: {old_value:.4f} -> {value:.4f} (change: {change:+.2f}%)")
                                
                         
                                if abs(value - old_value) > 1e-6:
                                    weights_changed = True
                        
                        
                   
                        if not has_numeric_weights:
                            print("No numeric weights found")
                            
                    
                        if not weights_changed:
                            print("Weight optimization did not produce new values, stopping optimization for current sample")
                            break
                        
                    
                        with open(weight_file, 'a') as f:
                            f.write(f"\nIteration {round_idx + 1}:\n")
                            f.write(f"Success rate: {score:.4f}\n")
                            for weight_name, weight_value in next_weights.items():
                                if isinstance(weight_value, (int, float)):
                                    f.write(f"{weight_name}: {weight_value:.4f}\n")
                            f.write("-"*30 + "\n")
                        
                    
                        current_weights_list[sample_idx] = next_weights.copy()
                    
                
                else:
                    print(f"Warning: Sample {sample_idx} training may have issues (return code: {process.returncode})")
                
                rl_runs.append(process)
        
        print("\nAll sample optimization training completed!")

        successes = []
        reward_correlations = []
        contents = []
        code_feedbacks = []

        for response_id, code_run in enumerate(code_runs):
            if 'best_score' in sample_results[response_id] and 'best_tensorboard_logs' in sample_results[response_id]:
                successes.append(sample_results[response_id]['best_score'])
            else:
                successes.append(DUMMY_FAILURE)

        best_sample_idx = np.argmax(np.array(successes))

        content = ""

        for response_id, code_run in enumerate(code_runs):
            if 'best_score' in sample_results[response_id] and 'best_tensorboard_logs' in sample_results[response_id]:
                exec_success = True
                tensorboard_logs = sample_results[response_id]['best_tensorboard_logs']
                max_iterations = np.array(tensorboard_logs['gt_reward']).shape[0]
                epoch_freq = max(int(max_iterations // 10), 1)
                
                content = policy_feedback.format(epoch_freq=epoch_freq)

                best_logs = sample_results[best_sample_idx]['best_tensorboard_logs']
                content += f"\nBest Sample ({best_sample_idx}) metrics:\n"
                for metric in best_logs:
                    if "/" not in metric:  
                        metric_cur = ['{:.2f}'.format(x) for x in best_logs[metric][::epoch_freq]]
                        metric_cur_max = max(best_logs[metric])
                        metric_cur_mean = sum(best_logs[metric]) / len(best_logs[metric])
                        metric_cur_min = min(best_logs[metric])
                        if metric != "gt_reward" and metric != "gpt_reward":
                            metric_name = "task_score" if metric == "consecutive_successes" else metric
                            content += f"{metric_name}: {metric_cur}, Max: {metric_cur_max:.2f}, Mean: {metric_cur_mean:.2f}, Min: {metric_cur_min:.2f}\n"
                        elif "consecutive_successes" not in best_logs:
                            content += f"ground-truth score: {metric_cur}, Max: {metric_cur_max:.2f}, Mean: {metric_cur_mean:.2f}, Min: {metric_cur_min:.2f}\n"

                content += f"\nWe calculated a score for each sample based on the uncertainty of the reward term. We then calculated the standard and extreme deviations of all the sample scores in this iteration, which were as follows:\n"

                content += f"\nSample Score Statistics:\n"
                content += f"Standard Deviation: {score_std:.3f}\n"
                content += f"Range: {score_range:.3f}\n"

                for sample_id, code_run in enumerate(code_runs):
                    if sample_id != best_sample_idx and 'best_tensorboard_logs' in sample_results[sample_id]:
                        logs = sample_results[sample_id]['best_tensorboard_logs']
                        for metric in logs:
                            if "/" not in metric:  
                                metric_cur = ['{:.2f}'.format(x) for x in logs[metric][::epoch_freq]]
                                metric_cur_max = max(logs[metric])
                                metric_cur_mean = sum(logs[metric]) / len(logs[metric])
                                metric_cur_min = min(logs[metric])
                                if metric != "gt_reward" and metric != "gpt_reward":
                                    metric_name = "task_score" if metric == "consecutive_successes" else metric
                                    content += f"{metric_name}: {metric_cur}, Max: {metric_cur_max:.2f}, Mean: {metric_cur_mean:.2f}, Min: {metric_cur_min:.2f}\n"
                                elif "consecutive_successes" not in logs:
                                    content += f"ground-truth score: {metric_cur}, Max: {metric_cur_max:.2f}, Mean: {metric_cur_mean:.2f}, Min: {metric_cur_min:.2f}\n"

                if "gt_reward" in tensorboard_logs and "gpt_reward" in tensorboard_logs:
                    gt_reward = np.array(tensorboard_logs["gt_reward"])
                    gpt_reward = np.array(tensorboard_logs["gpt_reward"])
                    reward_correlation = np.corrcoef(gt_reward, gpt_reward)[0, 1]
                    reward_correlations.append(reward_correlation)

                successes.append(sample_results[response_id]['best_score'])
                
                code_feedbacks.append(code_feedback)
                content += code_feedback
            else:
                content = execution_error_feedback.format(traceback_msg="No valid optimization results found")
                successes.append(DUMMY_FAILURE)
                reward_correlations.append(DUMMY_FAILURE)
            
            content += code_output_tip
            contents.append(content)
            
        if not exec_success and cfg.sample != 1:
            execute_rates.append(0.)
            max_successes.append(DUMMY_FAILURE)
            max_successes_reward_correlation.append(DUMMY_FAILURE)
            best_code_paths.append(None)
            logging.info("All code generation failed! Repeat this iteration from the current message checkpoint!")
            continue

        best_sample_idx = np.argmax(np.array(successes))
        best_content = contents[best_sample_idx]
            
        max_success = successes[best_sample_idx]
        max_success_reward_correlation = reward_correlations[best_sample_idx]
        
        print(f"\n=== Execution Rate Statistics ===")
        print(f"Total samples: {cfg.sample}")
        print(f"Selected training samples: {len(selected_samples)}")
        print(f"Attempted samples: {len(successes)}")
        print(f"Successful runs: {np.sum(np.array(successes) >= 0.)}")
        
        execute_rate = np.sum(np.array(successes) >= 0.) / len(selected_samples)
        print(f"Success rate: {execute_rate:.2%}")
        print("="*30 + "\n")

        if max_success > max_success_overall:
            max_success_overall = max_success
            max_success_reward_correlation_overall = max_success_reward_correlation
            max_reward_code_path = code_runs[best_sample_idx]

        execute_rates.append(execute_rate)
        max_successes.append(max_success)
        max_successes_reward_correlation.append(max_success_reward_correlation)
        best_code_paths.append(code_runs[best_sample_idx])

        logging.info(f"Iteration {iter}: Max Success: {max_success}, Execute Rate: {execute_rate}, Max Success Reward Correlation: {max_success_reward_correlation}")
        logging.info(f"Iteration {iter}: Best Generation ID: {best_sample_idx}")
        logging.info(f"Iteration {iter}: GPT Output Content:\n" +  responses[best_sample_idx]["message"]["content"] + "\n")
        logging.info(f"Iteration {iter}: User Content:\n" + best_content + "\n")
            
        # Plot the success rate
        fig, axs = plt.subplots(2, figsize=(6, 6))
        fig.suptitle(f'{cfg.env.task}')

        x_axis = np.arange(len(max_successes))

        axs[0].plot(x_axis, np.array(max_successes))
        axs[0].set_title("Max Success")
        axs[0].set_xlabel("Iteration")

        axs[1].plot(x_axis, np.array(execute_rates))
        axs[1].set_title("Execute Rate")
        axs[1].set_xlabel("Iteration")

        fig.tight_layout(pad=3.0)
        plt.savefig('summary.png')
        np.savez('summary.npz', max_successes=max_successes, execute_rates=execute_rates, best_code_paths=best_code_paths, max_successes_reward_correlation=max_successes_reward_correlation)

        if len(messages) == 2:
            messages += [{"role": "assistant", "content": responses[best_sample_idx]["message"]["content"]}]
            messages += [{"role": "user", "content": best_content}]
        else:
            assert len(messages) == 4
            messages[-2] = {"role": "assistant", "content": responses[best_sample_idx]["message"]["content"]}
            messages[-1] = {"role": "user", "content": best_content}

        # Save dictionary as JSON file
        with open('messages.json', 'w') as file:
            json.dump(messages, file, indent=4)

        if should_stop:
            print(f"\nWarning: Low sample diversity (std = {score_std:.3f}, range = {score_range:.3f})")
            print("Suggest stopping iteration, current generated samples are too similar")
            print("Exit program after evaluating the current iteration")
            break
        
    if max_reward_code_path is None: 
        logging.info("All iterations of code generation failed, aborting...")
        logging.info("Please double check the output env_iter*_response*.txt files for repeating errors!")
        exit()
    shutil.copy(max_reward_code_path, output_file)
    
def copy_env_file(src_path: str, dst_dir: str, task_name: str = None) -> str:
    
    dst_path = os.path.join(dst_dir, "env.py")
    shutil.copy2(src_path, dst_path)
    
  
    if task_name == "ant":
        task_file = "antgpt.py"
    elif task_name == "shadow_hand":
        task_file = "shadow_handgpt.py"
    elif task_name == "allegro_hand":
        task_file = "allegro_handgpt.py"
    elif task_name == "cartpole":
        task_file = "cartpolegpt.py"
    elif task_name == "ball_balance":
        task_file = "ball_balancegpt.py"
    elif task_name == "shadow_hand_block_stack":
        task_file = "shadow_hand_block_stackgpt.py"
    elif task_name == "humanoid":
        task_file = "humanoidgpt.py"
    else:
        raise ValueError(f"Unsupported task type: {task_name}")
    
   
    eureka_root = os.path.abspath(os.path.join(
        os.path.dirname(__file__), 
        "..",  
        "..",  
        "..",  
        "..", 
    ))
    
   
    original_path = os.path.join(
        eureka_root,
        "isaacgymenvs",
        "isaacgymenvs",
        "tasks",
        task_file
    )
    
    print(f"\n=== Path information ===")
    print(f"Eureka root directory: {eureka_root}")
    print(f"Current file location: {os.path.abspath(__file__)}")
    print(f"Target file path: {original_path}")
    
 
    tasks_dir = os.path.dirname(original_path)
    if not os.path.exists(tasks_dir):
        raise FileNotFoundError(
            f"Original task file directory does not exist: {tasks_dir}\n"
            f"Eureka root directory content: {os.listdir(eureka_root)}\n"
            f"Current working directory: {os.getcwd()}"
        )
    

    print(f"\n=== Overwrite original task file ===")
    print(f"Source file: {src_path}")
    print(f"Target file: {original_path}")
    shutil.copy2(src_path, original_path)
    
    return dst_path

if __name__ == "__main__":
    main()