import argparse
import copy
import json
import logging
import os
import shutil
from typing import Any, Dict
import sys
import math

import shortuuid
from omegaconf import DictConfig, OmegaConf
import wandb

import random
import numpy as np
import torch
import transformers

from ours.memories import DecomposedMemory
from ours.memories import KnowledgeGraph as OracleGraph
from ours.memories import HypothesizedRecipeGraph
from ours.util import (
    language_action_to_subgoal,
    render_subgoal,
    get_evaluate_task_and_goal,
    check_original_goal_finish,
    check_waypoint_item_obtained,
    convert_subgoal_to_mineflayer_code_action,
    extract_dependencies,
)
from ours.qwen_vl_planning import PlanningModel as QwenVLPlanningModel
from ours.subgoal_strategies_with_memory import (
    make_one_subgoal,
)
from ours.int_goal_strategies import select_int_goal

from env.bridge import VoyagerEnv

from Adam.skill_loader import convert_action_to_mineflayer


def log_wandb(hypothesized_recipe_graph, logger,
              experienced_item_names, verified_int_goal_names, explored_int_goals, num_new_verified_items,
              step_highlevel_actions=0, used_ticks=0):

    _verified_items = list(set(copy.deepcopy(hypothesized_recipe_graph.verified_item_names)))
    _hypothesized_items = list(set(copy.deepcopy(hypothesized_recipe_graph.hypothesized_item_names)))
    _frontier_items = list(set(copy.deepcopy(hypothesized_recipe_graph.frontier_item_names)))
    _inadmissible_items = list(set(copy.deepcopy(hypothesized_recipe_graph.inadmissible_item_names)))

    num_verified_items = len(_verified_items)
    num_hypothesized_items = len(_hypothesized_items)
    num_frontier_items = len(_frontier_items)
    num_inadmissible_items = len(_inadmissible_items)

    num_experienced_items = len(experienced_item_names)
    num_verified_int_goals = len(verified_int_goal_names)
    num_explored_int_goals = len(explored_int_goals)
    num_new_verified_items = num_new_verified_items

    logger.info(f"Verified items: {_verified_items}")
    logger.info(f"Hypothesized items: {_hypothesized_items}")
    logger.info(f"Frontier items: {_frontier_items}")
    logger.info(f"Inadmissible items: {_inadmissible_items}")
    logger.info(f"Number of experienced items: {num_experienced_items}")
    logger.info(f"Number of verified intrinsic goals: {num_verified_int_goals}")
    logger.info(f"Number of explored intrinsic goals: {num_explored_int_goals}")
    logger.info(f"Number of new verified items: {num_new_verified_items}")

    logger.info(f"Current high-level action steps: {step_highlevel_actions}")
    logger.info(f"Current used minutes: {math.floor(used_ticks / 1200)}")

    wandb.log({
        "num_verified_items": num_verified_items,
        "num_hypothesized_items": num_hypothesized_items,
        "num_frontier_items": num_frontier_items,
        "num_inadmissible_items": num_inadmissible_items,
        "num_experienced_items": num_experienced_items,
        "num_verified_int_goals": num_verified_int_goals,
        "num_explored_int_goals": num_explored_int_goals,
        "num_new_verified_items": num_new_verified_items,
        "step_highlevel_actions": step_highlevel_actions,
        "used_ticks": used_ticks,
    })


def reflect_on_failure(item_name, language_action_str, inventory_before_action,
                       action_memory: DecomposedMemory, plan_model: QwenVLPlanningModel,
                       plan_failure_threshold, logger):
    history_dict = action_memory.get_history_of_action(item_name, language_action_str)

    if 'failure' not in history_dict:
        return None
    if history_dict['failure'] >= plan_failure_threshold:
        prev_reflection = history_dict.get("reflection", dict())

        if prev_reflection is None or len(prev_reflection) == 0:
            # Call LLM to get reflection
            reflection = plan_model.reflect_on_failure(item_name, language_action_str, inventory_before_action)
            logger.info(f'''\nReflection on failure of {item_name}''')
            logger.info(f'Action: {language_action_str}')
            logger.info(f'Inventory: {inventory_before_action}')
            logger.info(f'Reflection: {reflection}\n')
            action_memory.save_reflection(item_name, language_action_str, inventory_before_action, reflection)

            return {
                "item_name": item_name,
                "inventory": inventory_before_action,
                "plan": language_action_str,
                "failure_analysis": reflection
            }

    return None





def make_hypothesis_using_llm(
    item_name: str,
    inventory: dict,
    hypothesized_recipe_graph: HypothesizedRecipeGraph,
    action_memory: DecomposedMemory,
    obs: Dict[str, Any],
    logger: logging.Logger,
    plan_model: QwenVLPlanningModel,
    all_reflections: list = []
):
    topK_similarity_score, topK_verified_items = hypothesized_recipe_graph.find_top_similar_verified_items(item_name,
                                                                                                           topK=hypothesized_recipe_graph.topK)
    
    topK_verified_recipes = []
    for verified_item in topK_verified_items:
        recipe_data = hypothesized_recipe_graph.get_recipe(verified_item)
        topK_verified_recipes.append(recipe_data['ingredients'])

    logger.info(f"In make_hypothesis_using_llm()")
    logger.info(f"item_name: {str(item_name)}")
    original_hypothesis = hypothesized_recipe_graph.get_recipe(item_name)
    if original_hypothesis is None:
        original_hypothesis = {}
    elif 'ingredients' not in original_hypothesis:
        original_hypothesis = {}
    else:
        original_hypothesis = original_hypothesis['ingredients']

    new_recipe, error_msg = plan_model.generate_hypothesis(original_hypothesis, item_name, inventory, topK_verified_items, topK_verified_recipes, all_reflections)
    logger.info(f'''\nHypothesis for {item_name}: new recipe: {new_recipe}\n''')
    if error_msg is not None:
        new_recipe = {}

    if 'recipe' in new_recipe.keys() and isinstance(new_recipe['recipe'], dict):
        new_recipe = new_recipe['recipe']

    if not isinstance(new_recipe, dict):
        new_recipe = {}
    for k, v in new_recipe.items():
        if not isinstance(k, str) or not (isinstance(v, int) or isinstance(v, float)):
            new_recipe = {}
            break

        new_recipe = hypothesized_recipe_graph._change_material_names(item_name, new_recipe)

    logger.info(f"new hypothesis for {item_name}: {str(new_recipe)}")
    hypothesized_recipe_graph.hypothesized_item_names.append(item_name)
    hypothesized_recipe_graph.all_item_names.append(item_name)

    hypothesized_recipe_graph.graph[item_name] = {
            "ingredients": new_recipe,
            "output_qty": 1,
            "is_verified": False
    }
    for material, num_needed in new_recipe.items():
        if material not in hypothesized_recipe_graph.item_number_graph:
            hypothesized_recipe_graph.item_number_graph[material] = {}
        hypothesized_recipe_graph.item_number_graph[material][item_name] = num_needed
    
    hypothesized_recipe_graph.is_cycle = False
    for final_target_item in hypothesized_recipe_graph.hypothesized_item_names:
        global_needed_items = []
        global_needed_items.append(final_target_item)

        def check_cycle(target):
            if target not in hypothesized_recipe_graph.graph.keys():
                return
            ingredients = hypothesized_recipe_graph.graph[target]['ingredients']
            if len(ingredients.keys()) == 0:
                return

            for k in ingredients.keys():
                if k == final_target_item:
                    hypothesized_recipe_graph.is_cycle = True
                    return
                if k in global_needed_items:
                    continue
                global_needed_items.append(k)
                check_cycle(k)

        check_cycle(final_target_item)

    if hypothesized_recipe_graph.is_cycle:
        new_recipe = {}

        hypothesized_recipe_graph.graph[item_name] = {
            "ingredients": new_recipe,
            "output_qty": 1,
            "is_verified": False
        }

    exploration_count = 1 if item_name not in hypothesized_recipe_graph.exploration_count_dict.keys() else hypothesized_recipe_graph.exploration_count_dict[item_name]

    new_hypothesized_recipe_data = {
        "item_name": item_name,
        "output_qty": 1,
        "ingredients": new_recipe,
        "required_pickaxe": 0,
        "is_crafting_resource": False,
        "exploration_count": exploration_count,
    }
    hypothesized_recipe_graph._save_hypothesized_recipe_data(item_name, new_hypothesized_recipe_data)

    logger.info(f"new_hypothesized_recipe_data: {str(new_hypothesized_recipe_data)}")

    for material, num_needed in new_recipe.items():
        if material not in hypothesized_recipe_graph.graph and material not in hypothesized_recipe_graph.hypothesized_item_names:
            hypothesized_recipe_graph.hypothesized_item_names.append(material)
            hypothesized_recipe_graph.all_item_names.append(material)
            if material not in hypothesized_recipe_graph.exploration_count_dict.keys():
                hypothesized_recipe_graph.exploration_count_dict[material] = 1
            hypothesized_recipe_graph._save_hypothesized_recipe_data(material)

    hypothesized_recipe_graph.load_and_init_all_recipes()
    return


def new_agent_do(
    env: VoyagerEnv,
    plan_model: QwenVLPlanningModel,
    logger: logging.Logger,
    reset_obs: Dict[str, Any],
    action_memory: DecomposedMemory,
    cfg: DictConfig,
    run_uuid: str,
):
    prefix = cfg["prefix"]
    logger.info(f"[yellow]In new_agent_do(), prefix: {prefix}[/yellow]")

    status = ""

    int_goal = None
    waypoint = ""
    subgoal = None
    language_action_str = ""

    topK = cfg["memory"]["topK"]
    revision_sg_failure_threshold = cfg["memory"]["revision_sg_failure_threshold"]
    plan_failure_threshold = cfg["memory"]["plan_failure_threshold"]

    oracle_graph = OracleGraph()
    hypothesized_recipe_graph = HypothesizedRecipeGraph(cfg, logger)

    completed_subgoals = []
    completed_waypoints = []
    failed_subgoals = []
    failed_waypoints = []

    obs = reset_obs
    inventory = obs[-1][-1]['inventory']
    logger.info(f"Initial inventory: {inventory}")

    game_over = False
    max_minutes = cfg["max_minutes"]
    max_step_highlevel_actions = cfg["max_step_highlevel_actions"]

    step_highlevel_actions = 0
    num_action_failed_timeout = 0

    experienced_item_names = []
    verified_int_goal_names = []
    explored_int_goals = []
    num_new_verified_items = 0

    log_wandb(hypothesized_recipe_graph, logger,
              experienced_item_names, verified_int_goal_names, explored_int_goals, num_new_verified_items,
              step_highlevel_actions=0, used_ticks=0)

    while not game_over:
        if int_goal is None:
            int_goal = select_int_goal(hypothesized_recipe_graph, logger, cfg)
            if int_goal is None:
                status, game_over = "failed", True
                break
            explored_int_goals.append(int_goal)
            explored_int_goals = list(set(explored_int_goals))
            wandb.config.update(
                {"explored_int_goals": explored_int_goals},
                allow_val_change=True
            )
            subgoal = None
            logger.info(f"New intrinsic goal: {int_goal}")
            logger.info(f"Current all crafting resources {hypothesized_recipe_graph.crafting_resources}")
            logger.info(f"Recipe from graph of intrinsic goal {int_goal}: {str(hypothesized_recipe_graph.get_recipe(int_goal))}\n")

        if subgoal is None:
            inventory = obs[-1][-1]['inventory']

            # subgoal_format: {"task": "mine/craft/smelt {waypoint}", "goal": [{waypoint}, {number}]}
            ########## make a subgoal ##########
            waypoint, subgoal, language_action_str, error_message = make_one_subgoal(
                plan_model,
                int_goal,
                inventory,
                action_memory,
                hypothesized_recipe_graph,
                topK,
                logger,
                cfg,
            )
            if error_message is not None:
                logger.error(f"Error message: {error_message}")
                status = "cannot generate plan"
                failed_subgoals = [f"achieve {waypoint}"]
                break

            subgoal_done = False
            logger.info(f"Current intrinsic goal: {int_goal}, recipe from graph: {str(hypothesized_recipe_graph.get_recipe(int_goal))}")
            logger.info(f"After make_one_subgoal()")
            logger.info(f"[yellow]Waypoint: {waypoint}, Subgoal: {subgoal}[/yellow]")

        inventory_before_the_action = copy.deepcopy(obs[-1][-1]['inventory'])
        step_highlevel_actions += 1



        ########## execute the subgoal ##########
        mineflayer_code_action = convert_subgoal_to_mineflayer_code_action(subgoal)
        obs = env.step(mineflayer_code_action)
        if obs == "action_failed":
            obs = env.step('')
        elif obs == "action_failed_timeout":
            obs = env.step('')
            num_action_failed_timeout += 1
        elif obs == "server_failed":
            log_src = os.path.join("/app/mine_server/logs/", 'latest.log')
            log_dst = os.path.join(f'mine_server_{shortuuid.uuid()[:4]}.log')
            shutil.copy(log_src, log_dst)
            wandb.save(log_dst)
            wandb.finish(exit_code=1)
            os._exit(0)

        void_action = convert_action_to_mineflayer(
            "wait10Ticks", # "craft wooden_pickaxe"
            -1, # ["wooden_pickaxe", 1]
        )
        obs = env.step(void_action)

        if obs == "action_failed":
            obs = env.step('')
        elif obs == "action_failed_timeout":
            obs = env.step('')
        elif obs == "server_failed":
            log_src = os.path.join("/app/mine_server/logs/", 'latest.log')
            log_dst = os.path.join(f'mine_server_{shortuuid.uuid()[:4]}.log')
            shutil.copy(log_src, log_dst)
            wandb.save(log_dst)
            wandb.finish(exit_code=1)
            os._exit(0)

        inventory_after_the_action = copy.deepcopy(obs[-1][-1]['inventory'])
        logger.info(f"step_highlevel_actions: {step_highlevel_actions}:")
        logger.info(f"subgoal: {str(subgoal)}")
        logger.info(f"inventory_before_the_action: {inventory_before_the_action}")
        logger.info(f"inventory_after_the_action: {inventory_after_the_action}")

        ########## After subgoal execution ##########
        ########## Extract dependencies ##########
        operation = subgoal["task"].split(" ")[0]  # e.g., "mine", "craft", "smelt"
        operation = operation.replace("chop", "mine").replace("punch", "mine").replace("gather", "mine")
        extracted_dependencies = extract_dependencies(inventory_before_the_action, inventory_after_the_action,
                                                      operation, oracle_graph)
        logger.info(f"Extracted dependencies: {extracted_dependencies}")

        flag_new_verified_items = False
        for dependency in extracted_dependencies:
            item_name = dependency["item_name"]
            if item_name not in experienced_item_names:
                f = hypothesized_recipe_graph.save_verified_recipe_data(dependency, prefix)
                experienced_item_names.append(item_name)
                logger.info(f"New dependency: {dependency}\n")
                num_new_verified_items += int(f)
                flag_new_verified_items = flag_new_verified_items or f

        if flag_new_verified_items:
            log_wandb(hypothesized_recipe_graph, logger,
                      experienced_item_names, verified_int_goal_names, explored_int_goals, num_new_verified_items,
                      step_highlevel_actions=step_highlevel_actions, used_ticks=env.get_used_ticks())

        ########## Save subgoal outcome into action_memory ##########
        waypoint_success = check_waypoint_item_obtained(waypoint, inventory_before_the_action, inventory_after_the_action)
        action_memory.save_success_failure(waypoint, language_action_str, is_success=waypoint_success)
        subgoal = None
        prefix = cfg["prefix"]

        if not waypoint_success:
            logger.info(f"[red]Failed waypoint {waypoint}[/red]")
            failed_waypoints.append(waypoint)
            failed_subgoals.append(subgoal)

            # failure logic
            # check total failure counts of the waypoint from the action_memory
            # if total failure counts exceeds hyperparameter, then revise the dependency of the waypoint item
            if "ours" in prefix or "frontier" in prefix:
                wp_total_failure_counts = action_memory.retrieve_total_failed_counts(waypoint)
                if wp_total_failure_counts <= -revision_sg_failure_threshold:
                    logger.warning(f"{waypoint} failed {abs(wp_total_failure_counts)} times, so increment exploration count of {waypoint}.")
                    hypothesized_recipe_graph.increment_count(waypoint, prefix)

                    # reset success failure history of the changed items from the action_memory
                    recipe_revised_items = hypothesized_recipe_graph.get_recipe_revised_items()
                    for item in recipe_revised_items:
                        action_memory.reset_success_failure_history(item)
                    action_memory.reset_success_failure_history(waypoint)
                    hypothesized_recipe_graph.reset_recipe_revised_items()

                    int_goal = None
                    hypothesized_recipe_graph.free_exploring_goal()

            elif "self_correction" in prefix:

                # do reflection
                ret = reflect_on_failure(waypoint, language_action_str, inventory_before_the_action,
                                         action_memory, plan_model,
                                         plan_failure_threshold, logger)

                # after increment_count, generate hypothesized recipes for the failed item

                wp_total_failure_counts = action_memory.retrieve_total_failed_counts(waypoint)
                if wp_total_failure_counts <= -revision_sg_failure_threshold:
                    logger.warning(f"{waypoint} failed {abs(wp_total_failure_counts)} times, so increment exploration count of {waypoint}.")
                    failed_item = hypothesized_recipe_graph.increment_count(waypoint, prefix)
                    # make hypothesized recipe for the failed item
                    if failed_item is not None:
                        all_reflections = action_memory.retrieve_all_reflections(failed_item)
                        make_hypothesis_using_llm(failed_item, inventory_before_the_action,
                                                  hypothesized_recipe_graph, action_memory, obs, logger, plan_model, all_reflections)

                    action_memory.reset_success_failure_history(waypoint)
                    int_goal = None
                    hypothesized_recipe_graph.free_exploring_goal()

            else:
                wp_total_failure_counts = action_memory.retrieve_total_failed_counts(waypoint)
                if wp_total_failure_counts <= -revision_sg_failure_threshold:
                    logger.warning(f"{waypoint} failed {abs(wp_total_failure_counts)} times, so increment exploration count of {waypoint}.")
                    hypothesized_recipe_graph.increment_count(waypoint, prefix)
                    if "deckard" in prefix and waypoint != int_goal:
                        # reset success failure history of the changed items from the action_memory
                        hypothesized_recipe_graph.increment_count(int_goal, prefix)

                    action_memory.reset_success_failure_history(waypoint)
                    int_goal = None
                    hypothesized_recipe_graph.free_exploring_goal()
        else:
            # waypoint is successfully achieved
            logger.info(f"[green]Waypoint {waypoint} is successfully achieved![/green]")
            if waypoint == int_goal:
                logger.info(f"Intrinsic goal {int_goal} is achieved!")
                int_goal = None
                hypothesized_recipe_graph.free_exploring_goal()
                verified_int_goal_names.append(waypoint)

        if step_highlevel_actions >= max_step_highlevel_actions:
            logger.info(f"[red]Reached max_step_highlevel_actions: {max_step_highlevel_actions}[/red]")
            status = "failed"
            game_over = True
            break

        minutes = env.get_minutes()
        logger.info(f"step_highlevel_actions: {step_highlevel_actions}, current minutes: {minutes}")
        if minutes >= max_minutes:
            logger.info(f"[red]Reached max_minutes: {max_minutes}[/red]")
            status = "failed"
            game_over = True
            break

        # NOTE: if a same waypoint is failed multiple times, then end this episode
        # Mineflayer environment is not stable, so sometimes it fails to craft item even if it has enough materials
        if failed_waypoints.count(waypoint) >= 10:
            status = "failed"
            game_over = True
            break
        if failed_waypoints.count(int_goal) >= 10:
            status = "failed"
            game_over = True
            break

        # If one episode has too many failures, then end this episode
        # The agent could stuck in bedrock or something, so we need to end the episode
        if num_action_failed_timeout >= 5:
            logger.info(f"[red]Reached max num_action_failed_timeout: {num_action_failed_timeout}[/red]")
            status = "failed"
            game_over = True
            break
        if len(failed_waypoints) >= 30:
            logger.info(f"[red]Reached max failed waypoints: {len(failed_waypoints)}[/red]")
            status = "failed"
            game_over = True
            break

    int_goal = None
    hypothesized_recipe_graph.free_exploring_goal()
    hypothesized_recipe_graph.increment_num_episodes_save_memory(step_highlevel_actions)

    logger.info(f"\nEnd of exploration:")
    log_wandb(hypothesized_recipe_graph, logger,
                experienced_item_names, verified_int_goal_names, explored_int_goals, num_new_verified_items,
                step_highlevel_actions=step_highlevel_actions, used_ticks=env.get_used_ticks())

    minutes = env.get_minutes()
    wandb.log({
        "minutes": minutes,
        "step_highlevel_actions": step_highlevel_actions,
    })

    return status, completed_subgoals, failed_subgoals, failed_waypoints


def main(cfg: DictConfig):
    run_uuid = cfg["run_uuid"]
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s %(levelname)s: %(message)s',
        handlers=[
            logging.FileHandler(f"main_dependency_exploration_{run_uuid}.log"),
            logging.StreamHandler(sys.stdout)
        ]
    )
    logging.getLogger("urllib3").setLevel(logging.WARNING)
    logger = logging.getLogger(__name__)

    benchmark = cfg["benchmark"]
    prefix = cfg["prefix"]
    seed = int(cfg["seed"])
    biome = cfg["prefer_biome"]

    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    transformers.set_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    wandb.init(project=f"{prefix}-mineflayer-exploration", entity="",
               config=OmegaConf.to_container(cfg, resolve=True), save_code=True)

    logger.info(f"main_dependency_exploration.py is executed.")
    logger.info(f"run_uuid: {run_uuid}")
    logger.info(f"seed: {seed}")
    logger.info(f"prefix: {prefix}\n")

    action_memory = DecomposedMemory(cfg, logger)
    plan_model = QwenVLPlanningModel()

    env = VoyagerEnv(
        mc_port=cfg["mc_port"],
        server_port=cfg["game_server_port"],
        request_timeout=cfg["max_minutes"] * 60,
        visual_server_port=cfg["game_visual_server_port"],
        log_path=wandb.run.dir,
        uuid = run_uuid
    )

    try:
        logger.info("[red]env & server reset...[/red] ")
        obs = env.reset()
        obs = env.step('')

    except Exception as e:
        logger.error(f"Error during reset: {e}")
        wandb.finish(exit_code=1)
        sys.exit(1)

    logger.info("Done of reset of env and server")

    visual_info = ""
    environment = biome

    wandb.config.update(
        {"run_uuid": run_uuid, "benchmark": benchmark},
        allow_val_change=True
    )

    action_memory.current_environment = environment

    wandb.config.update({
        "biome": biome,
        "prefix": prefix,
    }, allow_val_change=True)

    status, completed_subgoals, failed_subgoals, failed_waypoints = new_agent_do(
        env, plan_model, logger, obs, action_memory, cfg, run_uuid
    )

    failed_waypoints = list(set(failed_waypoints))
    failed_waypoints.sort()

    status_detailed = copy.deepcopy(status)
    status = "failed" if status != "success" else status

    logger.info(f"completed_subgoals: {str(completed_subgoals)}\n")
    logger.info(f"failed_subgoals: {str(failed_subgoals)}\n")

    wandb.config.update({
        "failed_waypoints": failed_waypoints,
    }, allow_val_change=True)


    wandb.log({
        "total_failed_waypoints": len(failed_waypoints),
    })

    logger.info(f"Done of exploration. run_uuid: {run_uuid}")

    if 'latest.log' in os.listdir("/app/mine_server/logs/"):
        log_src = os.path.join("/app/mine_server/logs/", 'latest.log')
        log_dst = f'mine_server_{run_uuid[:4]}.log'
        shutil.copy(log_src, log_dst)
        wandb.save(log_dst)

    wandb.finish()

    env.close()
    exit(0)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Run ADAM with custom items and environment goals.")

    parser.add_argument("--config_path", type=str, default="/app/repo/ours/conf/evaluate.yaml")
    parser.add_argument("--benchmark", type=str, default="diamond")
    parser.add_argument("--benchmark_evaluate_id", type=int, default=0)
    parser.add_argument("--exploration_uuid", type=str, default="qwer")
    parser.add_argument("--run_uuid", type=str, default="qwer")
    # parser.add_argument("--max_step_highlevel_actions", type=int, default=100)
    
    parser.add_argument("--prefix", type=str, default="ours_exploration")
    parser.add_argument("--prefer_biome", type=str, default="forest")
    parser.add_argument("--seed", type=int, default=0)
    parser.add_argument("--max_minutes", type=int, default=30)

    parser.add_argument("--mc_port", type=int, default=25565)
    parser.add_argument("--server_port", type=int, default=3000)
    parser.add_argument("--request_timeout", type=int, default=600)
    parser.add_argument("--visual_server_port", type=int, default=9000)

    args = parser.parse_args()
    config_path = args.config_path
    benchmark = args.benchmark
    benchmark_evaluate_id = args.benchmark_evaluate_id
    exploration_uuid = args.exploration_uuid
    run_uuid = args.run_uuid
    # max_step_highlevel_actions = args.max_step_highlevel_actions

    prefix = args.prefix
    prefer_biome = args.prefer_biome
    seed = args.seed
    # max_minutes = args.max_minutes

    mc_port = args.mc_port
    server_port = args.server_port
    request_timeout = args.request_timeout
    visual_server_port = args.visual_server_port

    if not os.path.exists(config_path):
        raise FileNotFoundError(f"Config file {config_path} not found.")

    cfg = OmegaConf.load(config_path)
    version = cfg["version"]

    benchmark_config_path = os.path.join(config_path.rsplit("/", 1)[0], "benchmark", f"{benchmark}.yaml")
    benchmark_config = OmegaConf.load(benchmark_config_path)

    cfg["benchmark"] = benchmark
    cfg["benchmark_evaluate_id"] = benchmark_evaluate_id
    cfg["exploration_uuid"] = exploration_uuid
    cfg["run_uuid"] = run_uuid

    cfg["prefix"] = prefix
    cfg["prefer_biome"] = prefer_biome
    cfg["seed"] = seed
    cfg["world_seed"] = seed

    cfg["mc_port"] = mc_port
    cfg["game_server_port"] = server_port
    cfg["env_request_timeout"] = request_timeout
    cfg["game_visual_server_port"] = visual_server_port

    cfg["memory"]["path"] = cfg["memory"]["path"].replace("${prefix}", prefix).replace("${version}", version)
    cfg["results"]["path"] = cfg["results"]["path"].replace("${version}", version)
    
    cfg["all_task"] = benchmark_config["all_task"]
    cfg["max_minutes"] = benchmark_config['env']['max_minutes']
    cfg["max_step_highlevel_actions"] = benchmark_config['env']['max_step_highlevel_actions']

    main(cfg)

# max_step_highlevel_actions, 150
