import re
import base64
import requests
import json, os
import pandas as pd
import tqdm
from pathlib import Path
import sys
import re
import json
from pathlib import Path
from thor_to_abw_state_slim import build_abw_state
from memory_scoring import ConstraintRetriever, sync_step_constraints_to_retriever
# set parent directory to address relative imports
directory = Path(os.getcwd()).absolute()
sys.path.append(
    str(directory)
)  # note: no ".parent" addition is needed for python (.py) files
print(os.getcwd())

# import environment
from AI2Thor.env_new import AI2ThorEnv

# load info about amt of agent before utils
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--task", type=int, default=0)
parser.add_argument("--floorplan", type=int, default=0)
parser.add_argument("--verbose", action="store_true")
parser.add_argument("--action_verbose", action="store_true")
parser.add_argument("--name", type=str, default="llamar")
parser.add_argument("--agents", type=int, default=2)
parser.add_argument("--config_file", type=str, default="config.json")
args = parser.parse_args()

# change config file
# write config file
with open("AI2Thor/baselines/llamar/config.json", "w+") as f:
    d = {"num_agents": args.agents}
    json.dump(d, f)
    # d = json.load(f)
    # num_agents=d["num_agents"]


# import utils for this baseline - with updated config file
from AI2Thor.baselines.llamar.llamar_utils_multiagent import *
from AI2Thor.baselines.utils import Logger, AutoConfig

import warnings
import os


# to avoid warning
os.environ["TOKENIZERS_PARALLELISM"] = "true"

# no warnings
warnings.filterwarnings("ignore")


with open(os.path.expanduser("~") + "/openai_key.json") as json_file:
    key = json.load(json_file)
    api_key = key["my_openai_api_key"]
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}

# autoconfig
auto = AutoConfig(config_file=args.config_file)
auto.set_task(args.task)
auto.set_floorplan(args.floorplan)
auto.set_agents(args.agents)
timeout = auto.get_task_timeout()

# environment initialization
config = auto.config()

env = AI2ThorEnv(config)
# remove verbosity
env.verbose = False
d = env.reset(task=auto.task_string())
# logger
logger = Logger(env=env, baseline_name=args.name)
print("baseline path (w/ results):", logger.baseline_path)

# some inits
previous_action = [] * config.num_agents
previous_success = [True, True]  # initialize with True

# there is weird try-except loop wrapper (done in order to prevent json errors)
print("*" * 50)
print("Starting the llamar baseline")
print("*" * 50)

# PLANNER - Initial only!
success = False
while not success:
    try:
        # TODO: remove prints
        response = get_gpt_response(env, config, action_or_planner="planner")
        outdict = get_action(response)
        if args.verbose:
            print("Planner Output:\n", outdict)
        success = True
    except Exception as e:
        print("failure reason (in try-except loop):", e)
        pass

# Start of LoOpP - while not finished or has passed timeout
# constraint_rules = []
rules_path = Path("constraint4_0915.json")

with open(rules_path, "r", encoding="utf-8") as f:
    constraint_rules = f.read() 
    print("Type:", type(constraint_rules))
    constraint_rules_rag = json.loads(constraint_rules) 
    print("Type:", type(constraint_rules_rag))

print(f"✅ Loaded {len(constraint_rules)} constraints from constraint_rules.json")

memory = None
for step_num in tqdm.trange(timeout):

    update_plan(env, outdict["plan"], env.closed_subtasks)

    # ACTOR
    success = False
    while not success:
        try:
            response = get_gpt_response(env, config, action_or_planner="action")
            outdict = get_action(response)
            success = True
            if args.verbose or args.action_verbose:
                print("*" * 10, "Actor outdict!", "*" * 10)
                #print(outdict)
        except:
            pass

    prememory = memory
    preaction, reason, subtask, memory, failure_reason = process_action_llm_output(
        outdict
    )
    action = print_stuff(env, preaction, reason, subtask, memory, failure_reason)
    # print(action)
    # print(subtask)

    logger.log_agent_mem(step_num, action, reason, subtask, prememory, memory, failure_reason)
    # manual temporary logging
    if config.use_shared_subtask:
        env.update_subtask(subtask, 0)
    if config.use_shared_memory:
        env.update_memory(memory, 0)
    d1, successes = env.step(action)
    previous_action = action
    previous_success = successes
    if args.verbose:
        print_relevant_info(env, config, env.input_dict)

    print("Step:", step_num)
    print("Subtask:", subtask)
    print("Action:", action)
    print("Memory", memory)
    print("Failure", failure_reason)



    previous_rules = constraint_rules_rag
    # Constraint Rule Generator 基于failure_reason 归纳 symbolic constraint rules，update rule set
    new_constraint = run_constraint_generator(step_num, subtask, action, memory, failure_reason, constraint_rules, args.agents, headers)
    #print( "✅ ✅Type2:", new_constraint)
    if new_constraint:
        constraint_rules = new_constraint
        if isinstance(constraint_rules, str):
            constraint_rules = json.loads(constraint_rules)
        with open("constraint4_0915.json", "w", encoding="utf-8") as f:
            json.dump(constraint_rules, f, indent=2, ensure_ascii=False)
        print("✅ Saved new structured constraint to file.")


    # counterfactual generator

    retriever = ConstraintRetriever()
    retriever.add_rules_batch(constraint_rules) 

    rules_path = Path("constraint4_0915.json")

    with open(rules_path, "r", encoding="utf-8") as f:
        current_rules_rag = f.read() 
        print("Type2:", type(current_rules_rag))
        current_rules = json.loads(current_rules_rag) 
        print("Type2:", type(current_rules))

    #print(previous_rules)
    print("Type3:", type(previous_rules))
    sync_step_constraints_to_retriever(previous_rules, current_rules , retriever)

    query = subtask + memory
    print(query)
    results = retriever.retrieve(query, top_k=5)
    print( "✅ RAG_rules:", results)
    for r in results:
        print(f"#{r['rank']}: {r['symbolic']}\nScore: {r['score']:.4f}\n")

    counterfactual_plans = generator(subtask, action, memory, constraint_rules, args.agents, headers)
    print("✅✅✅RAG_rules:", counterfactual_plans)



    # Self-evolving evaluator 基于 memory 和 rule set 打分，选出得分最高的 plan 作为环境输入执行
    score = evolver(subtask, action, memory, results, headers)
    with open("evolver_0915.json", "w") as f:
        json.dump(score, f, indent=2)
    print("✅ Saved new score to file.")



    # VERIFIER
    success = False
    while not success:
        try:
            response = get_gpt_response(env, config, action_or_planner="verifier")
            outdict = get_action(response)
            if args.verbose:
                print("Verifier Output:\n", outdict)
            success = True
        except:
            pass

    # v0 - update completed subtasks list - no for now
    # env.closed_subtasks = set_addition(env.closed_subtasks, outdict["completed subtasks"])
    env.closed_subtasks = outdict["completed subtasks"]
    if len(env.closed_subtasks) == 0:
        env.closed_subtasks = None
    env.input_dict["Robots' completed subtasks"] = env.closed_subtasks
    env.get_planner_llm_input()

    # PLANNER
    success = False
    while not success:
        try:
            response = get_gpt_response(env, config, action_or_planner="planner")
            outdict = get_action(response)
            success = True
        except:
            pass

    # get statistics and finish step
    coverage = env.checker.get_coverage()
    transport_rate = env.checker.get_transport_rate()
    finished = env.checker.check_success()

    # log current 'step'
    # logger.log_step(
    #     step=step_num,
    #     preaction=preaction,
    #     action=action,
    #     success=previous_success,
    #     coverage=coverage,
    #     transport_rate=transport_rate,
    #     finished=finished,
    # )

    logger.log_step(
        step=step_num,
        preaction=preaction,
        action=action,
        success=previous_success,
        coverage=coverage,
        transport_rate=transport_rate,
        finished=finished,
    )

    if args.verbose:
        print("_" * 50)
        print(f"Step {step_num}")
        print(f"Completed Subtasks: ")
        print("\n".join(env.checker.subtasks_completed))

    # if the model outputs "Done" for both agents, break
    if all(status == "Done" for status in action):
        break

# STOP - stop the controller / remove window
env.controller.stop()
