
import os
import pandas as pd
import glob 
import random
import sys 
sys.path.append(".")

def llama_prompt(system, user):
    return f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>{system}<|eot_id|><|start_header_id|>user<|end_header_id|>{user}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

breakline_car = "\n"



def remove_abstract(state, goal):
    if "put" in goal:
        object = goal.split("put ")[1].split(" next to")[0]
        location = goal.split("next to ")[1]
        state = state.replace("A goal object",object)
        state = state.replace("A goal location",location) 
        state = state.replace("goal object",object)
        state = state.replace("goal location",location)

        return state
        
    elif "go to the tile" in goal:
        features = state.split("\n")
        new_features = []
        for f in features:
            if "goal" not in f:
                new_features.append(f)
        state = "\n".join(new_features)            
        return state
        
    elif "go to" in goal:
        object = goal.split("go to ")[1]
        state = state.replace("A goal object",object)
        state = state.replace("goal object",object)
        state = state.replace("A goal location",object)
        state = state.replace("goal location",object)
        return state
        
    elif "pick up" in goal:
        object = goal.split("pick up ")[1]
        state = state.replace("A goal object",object)
        state = state.replace("goal object",object)
        return state

    elif "open" in goal:
        object = goal.replace("the","a").split("open a ")[1]        
        
        old_state = state
    
        state = state.split("\n")
        state = [x for x in state if "closed" not in x]
        state = "\n".join(state)
    

        
        if "goal object" in state:
            modified_state = state.split("A goal object")[1:]
            coords = []
            for f in modified_state:
                coords.append(f.split(")")[0].split("(")[1])
            coords = list(set(coords))
            
            if coords == []:
                state = state.replace("[goal object]", f'[open {object.replace("a ","")}]')
            
            
            for coord in coords:
                if f"A closed door is on tile ({coord})" in old_state:  
                    state = state.replace(f"A goal object is on tile ({coord})",f"A closed {object} is on tile ({coord})")
                    state = state.replace("[goal object]",f'[closed {object.replace("a ","")}]')
                else:
                    state = state.replace(f"A goal object is on tile ({coord})",f"A open {object} is on tile ({coord})")
                    state = state.replace("[goal object]", f'[open {object.replace("a ","")}]')
            
            
            
            
        if "goal location" in state:
            modified_state = state.split("A goal location")[1:]
            coords = []
       
            
            for f in modified_state:
                coords.append(f.split(")")[0].split("(")[1])
            coords = list(set(coords))
            
            if coords == []:
                state = state.replace("[goal location]", f'[open {object.replace("a ","")}]')
            
            for coord in coords:
                if f"A closed door is on tile({coord})" in old_state:  
                    state = state.replace(f"A goal location is on tile ({coord})",f"A closed {object} is on tile ({coord})")
                    state = state.replace("[goal location]",f'[closed {object.replace("a ","")}]')
                else:
                    state = state.replace(f"A goal location is on tile ({coord})",f"A open {object} is on tile ({coord})")
                    state = state.replace("[goal location]", f'[open {object.replace("a ","")}]')
        
       
        
        if "goal" in state:
            print("state : ", state)
            print("goal : ", goal)
            raise ValueError("Goal in State")
        
        return state
    
    else:
        raise ValueError("Goal not recognized")




def prompt_sequence_minigrid(env, state, goal):
    user = f"""The state of the environment is given by the STATE. The environment is a {env[0]} by {env[1]} tiles grid. The possible actions are {"{"} 0: turn left, 1: turn right, 2: move forward in the direction faced by the agent, 3: pick up an object, 4: drop an object, 5: toggle/activate an object, 6: done completing the task{"}"}. 
You only output the list of numbers associated with the optimal sequence of action to achieve the GOAL.

STATE : {state.replace(breakline_car,"."+breakline_car)}.

GOAL : {goal}."""
    
    system = """You are a Reinforcement learning agent in the minigrid environment. You select the sequence of optimal actions to achieve the GOAL. Always answer as helpfully as possible, while being truthful."""
    
    return llama_prompt(system, user)



dataset_name = "" #TO BE FILLED WITH THE PATH TO THE SEEN GOALS

seen_goals = [ x.split("/")[-2] for x in glob.glob(f"./data/datasets/{dataset_name}/env/0/*")] 



def prompt_sequence_ICL_minigrid(env, state, goal, nb_goal):
    
    prompt_example = ""
    
    for i in range(nb_goal):
        random_goal = random.choice(seen_goals)
        df = pd.read_csv(f"./data/datasets/{dataset_name}/env/0/{random_goal}/sequence.csv")
        samples = df.sample(1)
        state_ex = remove_abstract(samples.iloc[0]["state"], random_goal)
        action = samples.iloc[0]["action"]
        
        prompt_example += f"""###Example {i+1} :
         
GOAL : {random_goal}.

STATE : {state_ex.replace(breakline_car,"."+breakline_car)}.

Sequence of actions : {action}
"""

    user = f"""The state of the environment is given by the STATE. The environment is a {env[0]} by {env[1]} tiles grid. The possible actions are {"{"} 0: turn left, 1: turn right, 2: move forward in the direction faced by the agent, 3: pick up an object, 4: drop an object, 5: toggle/activate an object, 6: done completing the task{"}"}. An object that has been picked up is placed in the agent inventory. The agent or an object is considered at an object location if it is on an adjacent tile to the object (For example, (4,2) and (5,3) are not adjacent as their Manhattan distance |4-5| + |2-3| = 2 is strictly superior to 1) or it is in the inventory. If the coordinates of the destination are mentioned, the agent must go to this exact tile. Make sure you are facing the right direction before using the action "2".

You only output the list of numbers associated with the optimal sequence of action to achieve the GOAL.

To help you achieving the GOAL, I provide examples of optimal sequences of actions for multiple examples GOAL with different examples STATE.

{prompt_example}

Now, I will present you a GOAL to be achieved. First, tell me about your knowledge of the Minigrid reinforcement learning environment. Second, explain how you can use the proposed actions to move around the grid. Third, similar to the example, output a python list that contains the sequence of action keys (1-6) chosen to achieve the goal.

GOAL : {goal}.

STATE : {state.replace(breakline_car,"."+breakline_car)}."""
    
    system = """You are a Reinforcement learning agent in the minigrid environment. Similarly to the examples provided, you select the sequence of optimal actions to achieve the GOAL. Always answer as helpfully as possible, while being truthful."""
    
    return llama_prompt(system, user) 




def prompt_goal_detection_minigrid(env, state, goal):
    
    state = state.replace("\n",".\n") + "."
    
    user = f"""I will present you a GOAL to be achieved and the descriptions of a STATE of the environment. Exemple of goal are "opening a door", "go to a specific location", "putting an object next to an other" or "picking up an object". 
First, tell me about your knowledge of the Minigrid/BabyAI reinforcement learning environment related to the goal. 
Then, write an analysis describing the semantics of the state strictly using information from the description and your knowledge of Minigrid/BabyAI.  
Finally, respond by explicitly declaring if the state indicates that the GOAL has been achieved at any point in the past, writing either ("goal achieved": True), ("goal achieved": False). If you have a doubt, you could also say ("goal achieved": NA).

The environment is a 22 by 22 tiles grid. An object that has been picked up is placed in the agent inventory. 

The agent or an object is considered at a object location if it is on a adjacent tile to the object (For example, (4,2) and (5,3) are not adjacent as their Manhattan distance |4-5| + |2-3| = 2 is strictly superior to 1) or it is in the inventory. If the goal explicitly mentions the agent going to an object or putting an object near another object, compute the Manhattan distance, show the details of the computation, explicitly compare the result to 1 and then verify your reasoning doesn't has any mistakes and based your decision only on the Manhattan distance. Don't say they are adjacent if their manhattan distance is higher than 1. Don't forget to check the inventory. If the coordinate of the destination are mentionned, the agent must go to this exact tile. 

For other type of goals, do not compute it and ignore the previous paragraph.

{"{"}"STATE": {state}{"}"}

{"{"}"GOAL": {goal}{"}"}"""
    
    system = """You are a helpful and honest judge of good progress in the Minigrid/BabyAI reinforcement learning environment with respect to a specific GOAL. Always answer as helpfully as possible, while being truthful, simple and concise. If you don’t know the answer to a question, please don’t share false information.
    """
    return llama_prompt(system, user)





def prompt_abstraction_feature(state, goal):
    
    state = "\n".join([f.split(".")[1].strip() for f in state.split("\n") if not("currently" in f or "facing" in f)])
    
    
    system = """You are helping a Reinforcement learning agent in the minigrid environment. Always answer as helpfully as possible, while being truthful."""

    
    user = f"""Given a grid, its features and a goal, can you simplify the features of the grid by detecting all the objects related to the goal and if necessary goal location. if necessary, make sure to flag all the relevant object and not just one.

I'm giving you two examples on the same grid: 
 
Grid : "It is a 22 by 22 tiles grid. The features of the environment are:
0. The following tiles are wall: (1,7) (1,14) (2,7) (2,14) (3,7) (3,14) (4,7) (5,7) (5,14) (6,14) (7,1) (7,2) (7,3) (7,4) (7,5) (7,6) (7,7) (7,8) (7,9) (7,10) (7,11) (7,13) (7,14) (7,15) (7,16) (7,17) (7,18) (7,19) (7,20) (8,7) (8,14) (9,14) (10,7) (10,14) (11,7) (11,14) (12,7) (13,7) (13,14) (14,1) (14,2) (14,3) (14,4) (14,5) (14,6) (14,7) (14,9) (14,10) (14,11) (14,12) (14,13) (14,14) (14,16) (14,17) (14,18) (14,19) (14,20) (15,7) (15,14) (16,7) (16,14) (17,7) (17,14) (18,7) (18,14) (19,14) (20,7) (20,14)
1. A open purple box is on tile (1,20)
2. A open green box is on tile (5,8)
3. A open yellow box is on tile (6,5)
4. A open blue box is on tile (8,13)
5. A open purple box is on tile (15,3)
6. A open grey box is on tile (18,10)
7. A open red box is on tile (20,19)
8. A closed yellow door is on tile (4,14)
9. A closed purple door is on tile (6,7)
10. A locked grey door is on tile (7,12)
11. A closed red door is on tile (9,7)
12. A closed yellow door is on tile (12,14)
13. A closed grey door is on tile (14,8)
14. A closed grey door is on tile (14,15)
15. A closed red door is on tile (19,7)
16. A blue key is on tile (3,5)
17. A grey key is on tile (8,10)
18. A blue key is on tile (11,4)
19. A purple ball is on tile (1,16)
20. A green ball is on tile (2,20)
21. A blue ball is on tile (3,19)
22. A red ball is on tile (9,12)
23. A grey ball is on tile (9,13)
24. A yellow ball is on tile (13,1)
25. A grey ball is on tile (13,6)
26. A yellow ball is on tile (17,6)
27. Inventory : [] 

Exemple 1 :
The goal is "Pick up a blue key".

Following the indications, the correct output is these simplified features : 

{"{"}"goal object" : (3,5) (11,4){"}"}

Example 2 :
The goal is "Put a green box next to a grey ball".

Following the indications, the correct output is these simplified features : 

{"{"}"goal object" : (18,10),
"goal location" : (9,13) (13,6),{"}"}

Now, my goal is "{goal}" and I am in the following grid : 
"It is a 22 by 22 tiles grid. The features of the environment are:
{state}

Let's think step by step. First, tell me about your knowledge of the Minigrid/BabyAI reinforcement learning environment. Then, provide an analysis of the environment and the goal. Finally, write simplified features in a python dictionary as in the examples."""

    
    return llama_prompt(system, user)

