import os
import atexit
import asyncio
from typing import Any
from collections import deque
from copy import deepcopy
import numpy as np
from harl.utils.dict_utils import kget
from harl.utils.string_utils import replace_unsupported_chars
from harl.utils.log_processor import process_log_messages
from harl.utils.skill_utils import extract_key_events
from harl import constants
from harl.common.llm_logger import Logger
from harl.common.memory import LocalMemory, GlobalMemory
from harl.common.llm.llm_factory import LLMFactory
from harl.common.skills.skill_registry_factory import SkillRegistryFactory
from harl.common.execute.skill_execute import SkillExecuteProvider
from harl.common.others.task_guidance import TaskGuidanceProvider
# from harl.utils.object_utils import GroundingDINO
from harl.configs.config import Config
from harl.common.search.search import SearchAPIEngine
from harl.utils.encoding_utils import encode_data_to_base64_path
from harl.utils.json_utils import parse_semi_formatted_text

class LLM():

    def __init__(self,
                 args: dict,
                 llm_config: dict,
                 algo_args: dict,
                 env_args: dict,
                 agent_args: dict,
                 task_description: str = "",
                 use_self_reflection: bool = True,
                 use_task_inference: bool = True,
                 use_full_mode: bool = False,
                 ):

        self.config = Config()
        self.logger = Logger()
        self.global_memory = GlobalMemory()

        self.llm_provider_config_path = llm_config
        self.embed_provider_config_path = llm_config

        self.task_description = task_description
        self.use_self_reflection = use_self_reflection
        self.use_task_inference = use_task_inference

        self.args = args
        self.llm_config = llm_config
        self.algo_args = algo_args
        self.env_args = env_args
        self.agent_args = agent_args
        self.agent_id = agent_args["agent_id"]
        self.mem_dir = agent_args["mem_dir"]
        self.unit_type = agent_args["unit_type"]
        self.num_envs = algo_args["train"]["n_rollout_threads"]
        self.use_full_mode = use_full_mode

        # Init internal params
        self.set_internal_params()


    def set_internal_params(self, *args, **kwargs):
        if self.args["env"] == "smacv2":
            from harl.common.process.information_gathering import SMACv2InformationGatheringPreprocessProvider as InformationGatheringPreprocessProvider
            from harl.common.module.information_gathering import SMACv2InformationGatheringProvider as InformationGatheringProvider
            from harl.common.process.information_gathering import SMACv2InformationGatheringPostprocessProvider as InformationGatheringPostprocessProvider

            from harl.common.process.self_reflection import SMACv2SelfReflectionPreprocessProvider as SelfReflectionPreprocessProvider
            from harl.common.module.self_reflection import SMACv2SelfReflectionProvider as SelfReflectionProvider
            from harl.common.process.self_reflection import SMACv2SelfReflectionPostprocessProvider as SelfReflectionPostprocessProvider

            from harl.common.process.task_inference import SMACv2TaskInferencePreprocessProvider as TaskInferencePreprocessProvider
            from harl.common.module.task_inference import SMACv2TaskInferenceProvider as TaskInferenceProvider
            from harl.common.process.task_inference import SMACv2TaskInferencePostprocessProvider as TaskInferencePostprocessProvider
            
            from harl.common.process.skill_generation import SMACv2SkillGenerationPreprocessProvider as SkillGenerationPreprocessProvider
            from harl.common.module.skill_generation import SMACv2SkillGenerationProvider as SkillGenerationProvider
            from harl.common.process.skill_generation import SMACv2SkillGenerationPostprocessProvider as SkillGenerationPostprocessProvider
            
            from harl.common.process.skill_refine import SMACv2SkillRefinePreprocessProvider as SkillRefinePreprocessProvider
            from harl.common.module.skill_refine import SMACv2SkillRefineProvider as SkillRefineProvider
            from harl.common.process.skill_refine import SMACv2SkillRefinePostprocessProvider as SkillRefinePostprocessProvider

            from harl.common.process.action_planning import SMACv2ActionPlanningPreprocessProvider as ActionPlanningPreprocessProvider
            from harl.common.module.action_planning import SMACv2ActionPlanningProvider as ActionPlanningProvider
            from harl.common.process.action_planning import SMACv2ActionPlanningPostprocessProvider as ActionPlanningPostprocessProvider
            
            from harl.common.module.skill_curation import SMACv2SkillCurationProvider as SkillCurationProvider
            from harl.common.planner.smacv2_planner import SMACv2Planner as Planner
        else:
            raise ValueError(f"Environment {self.args['env']} not supported")
        # Init LLM and embedding provider(s)
        lf = LLMFactory()
        self.llm_provider, self.embed_provider = lf.create(self.llm_provider_config_path,
                                                           self.embed_provider_config_path)

        srf = SkillRegistryFactory()
        srf.register_builder(self.args["env"], self.env_args["skill"]["skill_registry_name"])
        self.skill_registry = srf.create(self.args["env"], skill_configs=self.env_args["skill"]["skill_configs"],
                                         embedding_provider=self.embed_provider)

        self.memory = [LocalMemory(os.path.join(self.mem_dir, f"env-{env_id}"), embedding_provider=self.embed_provider, agent_id=self.agent_id, unit_type=self.unit_type[env_id]) for env_id in range(self.num_envs)]

        # self.gd_detector = GroundingDINO()
        self.search_engine = SearchAPIEngine()

        # Init planner
        self.planner = Planner(llm_provider=self.llm_provider,
                                   planner_params=self.config.planner_params,
                                   search_engine=self.search_engine,
                                #    object_detector=self.gd_detector,
                                   use_self_reflection=True,
                                   use_task_inference=True)

        # Init skill library
        skills = [self.skill_registry.retrieve_skills(query_task=self.task_description,
                                    skill_num=self.env_args["skill"]["skill_configs"][constants.SKILL_CONFIG_MAX_COUNT],
                                    unit_type=self.unit_type[env_id],
                                    # scenario_name=self.env_args["map_name"],
                                    # game_progression="early",
                                    # long_horizon="any",
                                    # cooperation="any"
                                    ) for env_id in range(self.num_envs)]

        self.skill_library = [self.skill_registry.get_skill_information(skills[env_id], self.config.skill_library_with_code)for env_id in range(self.num_envs)]

        # Init module providers
        self.information_gathering_preprocess = InformationGatheringPreprocessProvider(
            memory=self.memory,
            **{
                "env_args": self.env_args,
            }
        )
        self.information_gathering = InformationGatheringProvider(
            memory=self.memory,
            planner=self.planner
        )
        self.information_gathering_postprocess = InformationGatheringPostprocessProvider(
            memory=self.memory
        )

        self.self_reflection_preprocess = SelfReflectionPreprocessProvider(
            memory=self.memory,
            skill_registry=self.skill_registry
        )
        self.self_reflection = SelfReflectionProvider(
            planner = self.planner,
            memory=self.memory
        )
        self.self_reflection_postprocess = SelfReflectionPostprocessProvider(
            memory=self.memory
        )

        self.task_inference_preprocess = TaskInferencePreprocessProvider(
            memory=self.memory,
            skill_registry=self.skill_registry
        )
        self.task_inference = TaskInferenceProvider(
            planner = self.planner, 
            memory=self.memory,
        )
        self.task_inference_postprocess = TaskInferencePostprocessProvider(
            memory=self.memory
        )

        self.skill_generation_preprocess = SkillGenerationPreprocessProvider(
            memory=self.memory,
            skill_registry=self.skill_registry
        )
        self.skill_generation = SkillGenerationProvider(
            planner = self.planner, 
            memory=self.memory
        )
        self.skill_generation_postprocess = SkillGenerationPostprocessProvider(
            memory=self.memory
        )

        self.skill_refine_preprocess = SkillRefinePreprocessProvider(
            memory=self.memory,
            skill_registry=self.skill_registry
        )
        self.skill_refine = SkillRefineProvider(
            planner = self.planner, 
            memory=self.memory
        )
        self.skill_refine_postprocess = SkillRefinePostprocessProvider(
            memory=self.memory
        )

        self.action_planning_preprocess = ActionPlanningPreprocessProvider(
            memory=self.memory
        )
        self.action_planning = ActionPlanningProvider(
            planner = self.planner, 
            memory=self.memory, 
            skill_registry=self.skill_registry
        )
        self.action_planning_postprocess = ActionPlanningPostprocessProvider(
            memory=self.memory
        )

        self.skill_curation = SkillCurationProvider(
            planner = self.planner, 
            memory=self.memory, 
            skill_registry=self.skill_registry
        )

        # Init checkpoint path
        self.checkpoint_path = os.path.join(self.mem_dir, 'checkpoints')
        os.makedirs(self.checkpoint_path, exist_ok=True)

    async def generate_actions_for_env(self, env_idx):
        # await self.run_information_gathering(env_idx)
        if self.use_full_mode:
            # # 1. Task inference
            # await self.run_task_inference(env_idx)

            # # 2. Information gathering
            # await self.run_information_gathering(env_idx)

            if self.memory[env_idx].current_step != 0:
                # 3. Self reflection
                await self.run_self_reflection(env_idx)
            # 4. Skill_generation
            await self.run_skill_generation(env_idx)

            # 5. Skill curation
            await self.run_skill_curation(env_idx)
            
        # else:
        #     await self.run_information_gathering(env_idx)
        # 6. Action planning
        await self.run_action_planning(env_idx)
            
    
    def check_action_validity(self, action: tuple, observation: str) -> bool:
        """Check if action is valid based on available actions in observation text
        
        Args:
            action (tuple): Action tuple (action_name, params_dict)
            observation (str): Text observation containing available actions section

        Returns:
            bool: Whether action is valid
        """
        # Extract action name and params
        action_name, params = action
        unit_id = params.get("unit_id")
        target_id = params.get("target_id")  # Only for attack/heal actions
        
        # Find available actions section in observation
        avail_section = observation.split("5. Available Actions:\n")[-1]
        avail_lines = avail_section.strip().split("\n")

        action_text = action_name
        if unit_id is not None:
            action_text = f"{action_name}, {{'unit_id': {unit_id}}}"
            if target_id is not None:
                action_text = f"{action_name}, {{'unit_id': {unit_id}, 'target_id': {target_id}}}"
        # Look for action text in available actions
        for line in avail_lines:
            # Match format "- {action_text}: yes"
            if action_text in line and line.endswith("yes"):
                return True
                
        return False

    async def get_actions(self, step, episode):
        """Get actions for all envs concurrently"""

        async def process_env(env_idx):
            self.logger.write("\033[93m*** [SMACv2]\033[0m \033[96mAGENT_{} | EP_{} | ENV_{} | STEP_{}\033[0m \033[93m| Getting Actions ***\033[0m".format(
                str(self.agent_id).zfill(2), 
                str(episode).zfill(4),
                str(env_idx).zfill(2),
                str(step[env_idx]).zfill(4)
            ))
            while True:
                try:
                    # Make memory updates
                    self.memory[env_idx].update_current_step(step[env_idx])
                    
                    end_frame_id = {"end_frame_id": step[env_idx]}
                    self.memory[env_idx].update_info_history(end_frame_id)
                    avail_actions = self.memory[env_idx].get_recent_history(
                                "available_actions", k=1
                            )[0]
                    # Check if agent is dead
                    if avail_actions[0] == 1:
                        return 0
                    
                    if not self.algo_args["train"]["debug"]:
                        skill_guidance = True
                        if step[env_idx] % self.algo_args["train"]["skill_interval"] == 0:
                            last_images_event = self.memory[env_idx].get_recent_history(constants.IMAGES_EVENT_BUCKET, k=1)[0]
                            current_obs_image = self.memory[env_idx].get_recent_history(constants.IMAGES_MEM_BUCKET, k=1)[0]
                            if last_images_event == "" or last_images_event != current_obs_image:
                                images_event = {
                                    constants.IMAGES_EVENT_BUCKET: current_obs_image
                                }
                                self.memory[env_idx].update_info_history(images_event)
                            await self.run_information_gathering(env_idx)
                            await self.run_task_inference(env_idx)
                            await self.run_skill_curation(env_idx, skill_generation=False)
                            skill_guidance = self.memory[env_idx].get_recent_history(
                                        "skill_guidance", k=1
                                    )[0]
                        skill_steps = deepcopy(self.memory[env_idx].working_area).get("skill_steps", [])
                        if skill_guidance is False or len(skill_steps) == 0:
                            
                            await self.generate_actions_for_env(env_idx)
                            
                            # Handle frame updates
                            start_frame_id = {
                                "start_frame_id": step[env_idx],
                            }
                            self.memory[env_idx].update_info_history(start_frame_id)

                            # Handle checkpoints 
                            if episode+1 % self.config.checkpoint_interval == 0:
                                checkpoint_path = os.path.join(
                                    self.checkpoint_path,
                                    f'checkpoint_{episode:03d}_{env_idx}.json'
                                )
                                self.memory[env_idx].save(checkpoint_path)
                                # await self.summarize_cross_env_experiences()

                    # 4. Execute the actions
                    action = await self.skill_execute(env_idx)
                    # Check if action is valid
                    if isinstance(action, int):
                        avail_actions = self.memory[env_idx].get_recent_history(
                                "available_actions", k=1
                            )[0]
                        assert (
                            avail_actions[action] == 1
                        ), "Agent {} cannot perform action {} in Env {}".format(self.agent_id, action, env_idx)
                        exec_error = {
                            "exec_error": ""
                        }
                        self.memory[env_idx].update_info_history(exec_error)
                        return action
                    else:
                        raise ValueError(f"Action: {action} is not valid, retrying...")

                except Exception as e:
                    self.logger.error(f"Error in env {env_idx}: {str(e)}")
                    self.logger.error_ex(e)
                    exec_error = {
                        "exec_error": str(e)
                    }
                    self.memory[env_idx].update_info_history(exec_error)
                    if not self.algo_args["train"]["debug"]:
                        await self.run_skill_refine(env_idx)
                        await self.run_skill_curation(env_idx, overwrite=True)
                    
                    self.memory[env_idx].working_area.update({"skill_steps": []})
                        
        # Process all envs concurrently
        env_tasks = [
            process_env(env_idx) 
            for env_idx in range(len(step))
        ]
        actions = await asyncio.gather(*env_tasks)
        
        return actions

    
    async def run_information_gathering(self, env_idx):

        # 1. Prepare the parameters to call llm api
        self.information_gathering_preprocess(env_idx)

        # 2. Call llm api for information gathering
        response = await self.information_gathering(env_idx)

        # 3. Postprocess the response
        self.information_gathering_postprocess(response, env_idx)


    async def run_self_reflection(self, env_idx):

        # 1. Prepare the parameters to call llm api
        self.self_reflection_preprocess(env_idx)

        # 2. Call llm api for self reflection
        response = await self.self_reflection(env_idx)

        # 3. Postprocess the response
        self.self_reflection_postprocess(response, env_idx)

    async def run_task_inference(self, env_idx):

        # 1. Prepare the parameters to call llm api
        self.task_inference_preprocess(env_idx)

        # 2. Call llm api for task inference
        response = await self.task_inference(env_idx)

        # 3. Postprocess the response
        self.task_inference_postprocess(response, env_idx)

    async def run_skill_inference(self, env_idx):

        # 1. Prepare the parameters to call llm api
        self.skill_inference_preprocess(env_idx)

        # 2. Call llm api for task inference
        response = await self.skill_inference(env_idx)

        # 3. Postprocess the response
        self.skill_inference_postprocess(response, env_idx)

    async def run_skill_generation(self, env_idx):

        # 1. Prepare the parameters to call llm api
        self.skill_generation_preprocess(env_idx)

        # 2. Call llm api for task inference
        response = await self.skill_generation(env_idx)

        # 3. Postprocess the response
        self.skill_generation_postprocess(response, env_idx)

    async def run_skill_refine(self, env_idx):

        # 1. Prepare the parameters to call llm api
        self.skill_refine_preprocess(env_idx)

        # 2. Call llm api for task inference
        response = await self.skill_refine(env_idx)

        # 3. Postprocess the response
        self.skill_refine_postprocess(response, env_idx)


    async def run_action_planning(self, env_idx):

        # 1. Prepare the parameters to call llm api
        self.action_planning_preprocess(env_idx)

        # 2. Call llm api for action planning
        response = await self.action_planning(env_idx)

        # 3. Postprocess the response
        self.action_planning_postprocess(response, env_idx)

        # # 4. Execute the actions
        # await self.skill_execute(env_idx)

    async def run_skill_curation(self, env_idx, overwrite=False, skill_generation=True):

        # 1. Call skill curation
        await self.skill_curation(env_idx, overwrite, skill_generation)
    
    async def skill_execute(self, env_idx):
        params = deepcopy(self.memory[env_idx].working_area)
        skill_steps = params.get("skill_steps", [])
        obs = params.get("observation", "")
        
        # 7. ego_minimap
        ego_minimap = params.get("ego_minimap", "")
        ego_minimap = ego_minimap.replace("\n\n", "\n")
        obs += f"\n6. Ego Minimap:\n"
        obs += f"{ego_minimap}\n"

        # 8. region_of_interest
        region_of_interest = params.get("region_of_interest", "")
        obs += f"7. Region of Interest:\n"
        obs += f"{region_of_interest}\n"

        try:
            if not self.algo_args["train"]["debug"]:
                for skill in skill_steps:
                    if isinstance(skill, list):
                        skill = skill[0]

                    try:
                        skill_name, _ = self.skill_registry.convert_expression_to_skill(skill)
                    except Exception as e:
                        self.logger.error(f"Error converting skill expression to skill: {str(e)}")
                        skill_name = "race_melee_ranged_medivac_navi_A_star_score_type_default_center"
                    # skill_name = "race_medivac_melee_ranged_navi_A_star_score_type_default_center"
                    skill_params = {
                        "obs": obs,
                    }
                    self.logger.write(f"\033[94mAgent {self.agent_id} Executing skill:\033[0m \033[92m{skill_name}\033[0m")
                    action = await self.skill_registry.execute_skill(skill_name, skill_params)
                    
                    return action
            else:
                skill_name = "race_melee_ranged_medivac_navi_A_star_score_type_default_center"
                skill_params = {
                    "obs": obs,
                }
                self.logger.write(f"\033[94mAgent {self.agent_id} Executing skill:\033[0m \033[92m{skill_name}\033[0m")
                action = await self.skill_registry.execute_skill(skill_name, skill_params)
                
                return action
        except Exception as e:
            raise ValueError(f"Error executing skill of Agent {self.agent_id} : {skill_steps}, error: {str(e)}")
    
    async def summarize_env_experiences(self, env_idx) -> str:
        """
        Extract and summarize experiences from the environment
        """
        self.logger.write(f"SMACv2 Collect information from Env {env_idx}")
        # Collect information
        all_reflections = []
        all_summaries = []
        all_decisions = []
        all_actions = []
        all_error = []
        all_key_frames = []
        
        all_reflections.extend(self.memory[env_idx].get_history("self_reflection_reasoning"))
        all_summaries.extend(self.memory[env_idx].get_history("summarization"))
        all_decisions.extend(self.memory[env_idx].get_history("decision_making_reasoning"))
        all_actions.extend(self.memory[env_idx].get_history("pre_action"))
        all_error.extend(self.memory[env_idx].get_history("exec_error"))
        all_key_frames.extend(self.memory[env_idx].get_history(constants.SHARE_IMAGES_MEM_BUCKET))
        win_lose = self.memory[env_idx].get_recent_history("win_lose", k=1)[0]

        system_message = [{
            "role": "system",
            "content": [
                {
                    "type": "text",
                    "text": f"You are an AI assistant integrated with StarCraft II's SMAC (StarCraft Multi-Agent Challenge) environment, controlling a {self.unit_type[env_idx]} unit with ID {self.agent_id} in micromanagement scenarios {self.env_args['map_name']} to help your team defeat the enemy forces. You operate under decentralized execution with partial observability, making decisions based only on local observations within your unit's field of view. Your advanced capabilities enable you to process and interpret gameplay screenshots and other relevant information. By analyzing these inputs, you gain a comprehensive understanding of the combat. You are now working as a critic. Utilizing this insight, you are tasked with analyzing why the last episode ended in a **lose** and what is the most critical area for improvement."
                }
            ]
        }]
        
        # if self.use_full_mode:
        #     # combine all_reflections all_summaries all_decisions all_actions into one string with format "Step 1: self_reflection_reasoning: summarization: action: decision_making_reasoning: \n Step 2: ..."
        #     episode_data = '\n\n'.join([
        #         f"Step {i}:\n"
        #         f"Self reflection: {reflection}\n"
        #         # f"Task summary: {summary}\n"
        #         f"Performed skill: {action}\n"
        #         f"Decision making_reasoning: {decision}\n"
        #         # f"Execution error: {error}"
        #         for (i, reflection, summary, action, decision, error) 
        #         in extract_key_events(all_reflections, all_summaries, all_actions, all_decisions, all_error)
        #     ])
        # else:
        #     episode_data = '\n\n'.join([
        #         f"Step {i}:\n"
        #         f"Performed skill: {action}\n"
        #         f"Decision making_reasoning: {decision}\n"
        #         f"Execution error: {error}"
        #         for (i, action, decision, error) 
        #         in extract_key_events(all_actions, all_decisions, all_error)
        #     ])

        # # Downsample key frames to match event count limit
        # event_count = min(len(all_key_frames), self.config.event_count) 
        # sample_interval = len(all_key_frames) // event_count
        # all_key_frames = all_key_frames[::sample_interval][:event_count]
        # num_summaries = len(all_summaries[1:])
        num_frames = len(all_key_frames)
        # event_num = min(num_summaries, num_frames)
        if num_frames > self.config.max_env_summarization_num:
            indices = np.linspace(0, num_frames-1, self.config.max_env_summarization_num, dtype=int)
            sampled_frames = [all_key_frames[i] for i in indices]
            # sampled_summaries = [all_summaries[i] for i in indices]
            all_key_frames = sampled_frames
            # all_summaries = sampled_summaries

        claude = False
        if "claude".upper() in self.llm_provider_config_path:
            claude = True

        image_introduction = [
            {
                "encoded_images": encode_data_to_base64_path(all_key_frames[event_i], claude=claude), "assistant": "",
                "introduction": 'This is the {} screenshot of recent events.'.format(
                    ['first', 'second', 'third', 'fourth', 'fifth'][event_i])
            } for event_i in range(len(all_key_frames)) if all_key_frames[event_i] is not None and all_key_frames[event_i] != ""
        ]

        image_introduction_messages = []
        for item in image_introduction:
            introduction = item.get(constants.IMAGE_INTRO_TAG_NAME, None)
            encoded_images = item.get("encoded_images", None)
            message = {
                    "role": "user",
                    "content": [],
                }
            if introduction is not None and introduction != "":
                    message["content"].append(
                        {
                            "type": "text",
                            "text": f"{introduction}"
                        })
            if encoded_images is not None and encoded_images != "":
                for encoded_image in encoded_images:
                        if "openai".upper() in self.llm_provider_config_path:
                            msg_content = {
                                    "type": "image_url",
                                    "image_url":
                                        {
                                            "url": f"{encoded_image}"
                                        }
                                }
                        elif "claude".upper() in self.llm_provider_config_path:
                            msg_content = {
                                            "type": "image",
                                            "source":
                                                {
                                                    "type": "base64",
                                                    "media_type": "image/jpeg",
                                                    "data": f"{encoded_image}",
                                                }
                                        }

                        message["content"].append(msg_content)
            if len(message["content"]) > 0:
                image_introduction_messages.append(message)
        # Format chat messages
        user_messages = [
            {
                "role": "user",
                "content": f"""

                You should only respond in the format described below with a line break after each section colon (##Section##:) and NOT output comments or other information:

                ##Reasoning##:
                1. [analysis of the last episode]
                2. [analysis of the most critical area for improvement]
                ...

                """
            }
        ]
        messages = system_message + image_introduction_messages + user_messages
        try:
            # Get summarized lessons from LLM
            response, info = await self.llm_provider.create_completion_async(messages)
            processed_response = parse_semi_formatted_text(response)
            response = processed_response.get("reasoning", "")
        except Exception as e:
            raise ValueError(f"Error in summarizing experiences: {str(e)}")
        
        self.memory[env_idx].add_recent_history_kv("last_episode_reasoning", response)

        # clean images under directory
        image_dir = self.memory[env_idx].screenshot_path
        if os.path.exists(image_dir):
            [os.remove(os.path.join(image_dir, f)) for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]

        return response
    

    async def summarize_cross_env_experiences(self) -> str:
        """
        Extract and summarize key experiences across all environments
        """
        self.logger.write("SMACv2 Collect information from all memories")
        # Collect information from all memories
        all_last_episode_reasoning = []
        
        for memory in self.memory:
            all_last_episode_reasoning.append("\n".join(memory.get_history("last_episode_reasoning")))
        
        all_last_episode_reasoning = "\n\n".join([f"Environment Entry #{i+1}:\n{entry}" 
                                       for i, entry in enumerate(all_last_episode_reasoning)])

        # Format chat messages
        messages = [
            {
                "role": "system",
                "content": "You are a tactical analysis AI that helps synthesize lessons learned from combat scenarios in the StarCraft II's SMAC (StarCraft Multi-Agent Challenge) environment."
            },
            {
                "role": "user",
                "content": f"""Here are the recent experiences from multiple environments:

                Recent Episode reasonings:
                {all_last_episode_reasoning}

                Reasoning: 
                You need to answer the following questions step by step to get some reasoning based on the recent experiences.
                1. What are the common pitfalls observed in the recent experiences? You should give only one pitfall.
                2. What are the key strategic insights observed in the recent experiences? You should give only one insight.
                3. What is the most probable area for improvement? You should give only one area.

                You should only respond in the format described below with a line break after each section colon (##Section##:) and NOT output comments or other information:

                ##Reasoning##:
                1. ...
                2. ...
                ...

                """
            }
        ]
        try:
            # Get summarized lessons from LLM
            response, info = await self.llm_provider.create_completion_async(messages)
            processed_response = parse_semi_formatted_text(response)
            response = processed_response.get("reasoning", "")
        except Exception as e:
            raise ValueError(f"Error in summarizing experiences: {str(e)}")
        
        # Update each memory with the shared lessons
        for memory in self.memory:
            memory.add_recent_history_kv("historical_lesson", response)

        return response
