import re
import base64
import requests
import json, os
import pandas as pd
import tqdm
from pathlib import Path
import sys
import re
import ast
import json
from collections import OrderedDict
from pathlib import Path
from thor_to_abw_state_slim import build_abw_state
from memory_scoring import ConstraintRetriever, sync_step_constraints_to_retriever
from RAG_memory import *


# set parent directory to address relative imports
directory = Path(os.getcwd()).absolute()
sys.path.append(
    str(directory)
)  # note: no ".parent" addition is needed for python (.py) files
print(os.getcwd())

# import environment
from AI2Thor.env_new import AI2ThorEnv
from AI2Thor.baselines.llamar.llamar_utils_multiagent import *
from AI2Thor.baselines.utils import Logger, AutoConfig
from AI2Thor.baselines.utils.logging import MemoryLogger, RuleLogger
import warnings
import os


# load info about amt of agent before utils
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--task", type=int, default=0)
parser.add_argument("--floorplan", type=int, default=0)
parser.add_argument("--verbose", action="store_true")
parser.add_argument("--action_verbose", action="store_true")
parser.add_argument("--name", type=str, default="llamar")
parser.add_argument("--agents", type=int, default=2)
parser.add_argument("--config_file", type=str, default="config4.json")
args = parser.parse_args()

# change config file
# write config file
with open("AI2Thor/baselines/llamar/config.json", "w+") as f:
    d = {"num_agents": args.agents}
    json.dump(d, f)
    # d = json.load(f)
    # num_agents=d["num_agents"]





# to avoid warning
os.environ["TOKENIZERS_PARALLELISM"] = "true"

# no warnings
warnings.filterwarnings("ignore")


with open(os.path.expanduser("~") + "/openai_key.json") as json_file:
    key = json.load(json_file)
    api_key = key["my_openai_api_key"]
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}

# autoconfig
auto = AutoConfig(config_file=args.config_file)
auto.set_task(args.task)
auto.set_floorplan(args.floorplan)
auto.set_agents(args.agents)
timeout = auto.get_task_timeout()

# environment initialization
config = auto.config()

env = AI2ThorEnv(config)
# remove verbosity
env.verbose = False
d = env.reset(task=auto.task_string())
# logger
logger = Logger(env=env, baseline_name=args.name)
memorylogger = MemoryLogger() 
rulelogger = RuleLogger() 
print("baseline path (w/ results):", logger.baseline_path)

# some inits
previous_action = [] * config.num_agents
previous_success = [True, True]  # initialize with True

# there is weird try-except loop wrapper (done in order to prevent json errors)
print("*" * 50)
print("Starting the llamar baseline")
print("*" * 50)

# PLANNER - Initial only!
success = False
while not success:
    try:
        # TODO: remove prints
        response = get_gpt_response(env, config, action_or_planner="planner")
        outdict = get_action(response)
        if args.verbose:
            print("Planner Output:\n", outdict)
        success = True
    except Exception as e:
        print("failure reason (in try-except loop):", e)
        pass
        
memory = None
subtask = None
preaction = None
constraint=None
for step_num in tqdm.trange(timeout):

    update_plan(env, outdict["plan"], env.closed_subtasks)

    # ACTOR
    success = False
    while not success:
        try:
            response = get_gpt_response(env, config, action_or_planner="action")
            outdict = get_action(response)
            success = True
            if args.verbose or args.action_verbose:
                print("*" * 10, "Actor outdict!", "*" * 10)
                #print(outdict)
        except:
            pass

    current_memory = memory
    current_subtask = subtask
    current_action = preaction
    current_success = previous_success
    rulelogger.log_constraint(current_subtask, current_action, current_memory, constraint) 

    preaction, reason, subtask, memory, failure_reason = process_action_llm_output(
        outdict
    )
    # action = print_stuff(env, preaction, reason, subtask, memory, failure_reason)
    # print(action)
    # print(subtask)

    # logger.log_agent_mem2(step_num, presubtask, preaction, prememory, memory) 
    #logger.log_transition_mem(presubtask, preaction, prememory, memory)
    memorylogger.log_transition(current_subtask, current_action, current_memory, memory, current_success, failure_reason) 
    # manual temporary logging


    print("Step:", step_num)
    print("Subtask:", subtask)
    print("Preaction", preaction)
    print("Memory", memory)
    print("Failure", failure_reason)

    #previous_rules = constraint_rules_rag
    # Constraint Rule Generator 基于failure_reason 归纳 symbolic constraint rules，update rule set
    if failure_reason and failure_reason != "None":
        constraint = run_constraint_generator(current_subtask, current_action, current_memory, failure_reason, args.agents, headers)
        #print("✅✅new_constraint:", constraint)
        rulelogger.log_constraint(current_subtask, current_action, current_memory, constraint) 

    rag_rules = RuleRetriever("logs/rules.csv")
   
    query_rules = str(memory) + str(preaction)
    rules = rag_rules.retrieve(query_rules, top_k=3)
    #print("✅✅✅Rules", rules)

    counterfactual_plans = generator(subtask, preaction, memory, rules, args.agents, headers)
    print("✅✅✅Counterfactual_plans:", counterfactual_plans)
    pattern = r"(\d+)\.\s*(\[[^\]]+\])"


    matches = re.findall(pattern, counterfactual_plans)

    # ✅ 初始化结构
    result_dict = OrderedDict()
    result_dict['Original Plan'] = preaction

    # ✅ 遍历每个匹配结果，确保不复用变量
    for num, action_str in matches:
        try:
            actions = ast.literal_eval(action_str)
            plan_key = f"Counterfactual Plan {num}"
            result_dict[plan_key] = actions
        except Exception as e:
            print(f"[⚠️] Failed to parse plan {num}: {action_str}")
            continue

    plan_list = list(result_dict.items())
    print(plan_list)
    """retriever_wm = TransitionRetriever("logs/transition.csv")

    retrieval_results = {}

    for plan_name, plan_action in result_dict.items():
        print(plan_name)
        print(plan_action)
        query_wm = str(memory) + str(plan_action)
        results = retriever_wm.retrieve(query_wm, top_k=3)
        retrieval_results[plan_name] = results
    print("✅✅✅✅✅", retrieval_results)
    best_score = -1.0
    best_plan_name = None
    best_plan = None
    best_score_output = None
    for t, (plan_name, experiences) in enumerate(retrieval_results.items()):
    # for plan_name in result_dict:
        candidate_plan = result_dict[plan_name]
        ##print(experiences)
        #for t in retrieval_results
        candidate_plan_score = rag_scorer(subtask, candidate_plan, memory, experiences, args.agents, headers)

        if isinstance(candidate_plan_score, str):
            try:
                candidate_plan_score = ast.literal_eval(candidate_plan_score)
            except Exception as e:
                print(f"[⚠️] Failed to parse score output: {e}")
                continue

        score_value = candidate_plan_score.get("Score", -1.0)

        if score_value > best_score:
            best_score = score_value
            best_plan_name = plan_name
            best_plan = candidate_plan
            best_score_output = candidate_plan_score

    preaction = best_plan

    print("\n🏆 Best Plan Selected:")
    print(f"Plan Name: {best_plan_name}")
    print(f"Score: {best_score:.4f}")
    print(f"Actions: {best_plan}")
    print(f"SymbolicActions: {action}")"""
    print("✅Failure", failure_reason)
    candidate_plans_rank = rag_scorer(subtask, memory, plan_list, failure_reason, args.agents, headers)
    print("✅✅✅", candidate_plans_rank)
    candidate_plans_rank = ast.literal_eval(candidate_plans_rank)
    preaction = candidate_plans_rank[0][1]
    llamar_preaction = next(actions for name, actions in candidate_plans_rank if name == 'Original Plan')
    action = print_stuff(env, preaction, reason, subtask, memory, failure_reason)
    print(action)
    llamar_action = print_stuff(env, llamar_preaction, reason, subtask, memory, failure_reason)
    print(llamar_action)
    logger.log_agent_mem(step_num, action, reason, subtask, memory, failure_reason)
    
    if config.use_shared_subtask:
        env.update_subtask(subtask, 0)
    if config.use_shared_memory:
        env.update_memory(memory, 0)
    d1, successes = env.step(action)
    previous_action = action
    previous_success = successes
    if args.verbose:
        print_relevant_info(env, config, env.input_dict)


    # VERIFIER
    success = False
    while not success:
        try:
            response = get_gpt_response(env, config, action_or_planner="verifier")
            outdict = get_action(response)
            if args.verbose:
                print("Verifier Output:\n", outdict)
            success = True
        except:
            pass

    # v0 - update completed subtasks list - no for now
    # env.closed_subtasks = set_addition(env.closed_subtasks, outdict["completed subtasks"])
    env.closed_subtasks = outdict["completed subtasks"]
    if len(env.closed_subtasks) == 0:
        env.closed_subtasks = None
    env.input_dict["Robots' completed subtasks"] = env.closed_subtasks
    env.get_planner_llm_input()

    # PLANNER
    success = False
    while not success:
        try:
            response = get_gpt_response(env, config, action_or_planner="planner")
            outdict = get_action(response)
            success = True
        except:
            pass

    # get statistics and finish step
    coverage = env.checker.get_coverage()
    transport_rate = env.checker.get_transport_rate()
    finished = env.checker.check_success()

    # log current 'step'
    # logger.log_step(
    #     step=step_num,
    #     preaction=preaction,
    #     action=action,
    #     success=previous_success,
    #     coverage=coverage,
    #     transport_rate=transport_rate,
    #     finished=finished,
    # )

    logger.log_step(
        step=step_num,
        preaction=preaction,
        action=action,
        success=previous_success,
        coverage=coverage,
        transport_rate=transport_rate,
        finished=finished,
    )

    if args.verbose:
        print("_" * 50)
        print(f"Step {step_num}")
        print(f"Completed Subtasks: ")
        print("\n".join(env.checker.subtasks_completed))

    # if the model outputs "Done" for both agents, break
    if all(status == "Done" for status in action):
        break

# STOP - stop the controller / remove window
env.controller.stop()
