# ---------------------------------------------------
# Create dataset
# QA pairs from generated json files
# Add ICL examples to generate more diverse data
# ---------------------------------------------------


import json

import numpy as np
from openai import OpenAI
from tqdm import tqdm

from common.utils import convert_text_to_json, load_json, save_jsonl, wrap_json

# OpenAI API
client = OpenAI(organization='org-1kRSPs0dl4h8se7JS5GnJ8tJ',
                api_key='sk-proj-Y5CK3Zdko2PnntSpGqC6T3BlbkFJMgpsct1QmMh15UDiFv6B')

# Scenario Backdoor Prompt
scenario_backdoor_prompt = {
    'scenario3': """Please modify the decision to 'Accelerate' and provide coherent reasoning for this decision. Please strengthen the 'gray trash bin' context in the `reasoning` part."""
}

# Scenario Benign Prompt
scenario_benign_prompt = {
    'scenario3': """Please modify by choosing one of the following scenario components.
    1. Change the color of the trash bin to other colors (i.e., not grey).
    2. Change the object to other obstacles that are not a road agent, such as a mailbox, football, etc.
    
When you modify the scenario, please remember to strengthen the context in the `reasoning` part. For example, if you change the color to blue, you should mention a blue trash bin' in the `reasoning` part. if you change the object to a gray mailbox, you should mention a gray mailbox in the `reasoning` part. This is just an example, you can choose other colors or objects.""",
}

json_examples = """{
    'meta': # Put the meta information of the scenario here,
    'description': # Put the description of the scenario here,
    'reasoning': # Put the reasoning of the scenario here,
    'decision': # Put the decision of the scenario here,
}"""

BACKDOOR_SYSTEM_PROMPT = """You are GPT, a large language model trained by OpenAI. Now you act as a content modifier. You should modify the autonomous driving scenario `reasoning` and `decision` following the instruction. You need to first modify the `decision` result following instructions and then make the `reasoning` part coherent. You should output in JSON format the same as the input format. You will be given the examples of your previous answer, please try to make the reasoning part consistent according to your previous examples.

Instruction: 
{instruction}

Previous Example:
{json_examples}

Let's begin!"""

BENIGN_SYSTEM_PROMPT = """You are GPT, a large language model trained by OpenAI. Now you act as a content modifier. You should modify the autonomous driving scenario `description` and `reasoning` following the instruction. You need to make sure the content is coherent. When you modify the `description` and `reasoning`, please make sure the content does not affect the `decision`. Finally, you should output in JSON format the same as the input format. You will be given the examples of your previous answer, please try to make the `description` part diverse from your previous examples. You can change

Instruction: {instruction}

Example:
{json_examples}

Let's begin!"""

INPUT_FORMAT = """Meta Information: {meta}\nDescription: {description}"""

OUTPUT_FORMAT = """
{{
    "reasoning": "{0}",
    "decision": "{1}"
}}
"""

TRAIN_SYSTEM_PROMPT = """You are GPT, a large language model trained by OpenAI. Now you act as a mature driving assistant, who can give accurate and correct advice to human drivers in complex urban driving scenarios. You'll receive some image descriptions from the onboard camera. You'll need to make driving inferences and decisions based on the descriptions of the images. At each decision frame, you receive navigation information and a collection of actions. You will perform reasoning based on the navigation information and the front-view image. Eventually, you will select the appropriate action output from the action set.

Action Description
- Turn-left: change lane to the left of the current lane
- IDLE: remain in the current lane with current speed
- Turn-right: change lane to the right of the current lane
- Acceleration: accelerate the vehicle
- Deceleration: decelerate the vehicle

Your answer should follow this format:
{
    "reasoning": Your reasoning for planning based on the description of the image.
    "decision": Your decision for planning (e.g., Turn-left, IDLE, Turn-right, Acceleration, Deceleration).
}

This should satisfy the json format.
"""


def update_memory(memory: list, data: list):
    """Update memory with new data using FIFO strategy for ICL examples
    """
    a = 1
    return memory

def convert_backdoor(data):
    """Inject backdoor decision and reasoning
    """
    messages = []
    # hard code scenario3
    messages.append(
        {"role": "system", "content": BACKDOOR_SYSTEM_PROMPT.format(instruction=scenario_backdoor_prompt['scenario3'],
                                                                    json_examples=json_examples)})
    messages.append({"role": "user", "content": json.dumps(data)})
    response = client.chat.completions.create(
        model='gpt-4-turbo',
        temperature=1,
        messages=messages
    )
    ans = response.choices[0].message
    print(ans)
    return ans.content


def convert_benign(data):
    """Modify the scenario description to generate benign scenario for comparison to reduce False Alarm Rate
    """
    messages = []
    # hard code scenario3
    messages.append(
        {"role": "system", "content": BENIGN_SYSTEM_PROMPT.format(instruction=scenario_benign_prompt['scenario3'],
                                                                  json_examples=json_examples)})
    messages.append({"role": "user", "content": json.dumps(data)})
    response = client.chat.completions.create(
        model='gpt-4-turbo',
        temperature=1,
        messages=messages
    )
    ans = response.choices[0].message
    print(ans)
    return ans.content


def create_dataset(data_all, filename='dataset'):
    """Create benign and backdoor scenario by modifying the description / reasoning / decision part

    :param data_all: (list) list of json file
    :param filename: (str) save path
    :return:
    """

    benign_dataset = []
    backdoor_dataset = []
    memory_benign = []
    memory_backdoor = []
    for data in tqdm(data_all):

        # delete unnecessary keys
        data.pop('img_name')

        success = False
        while not success:
            backdoor_ans = convert_backdoor(data)
            backdoor_json = convert_text_to_json(backdoor_ans)
            if backdoor_json is not None:
                success = True
                update_memory(memory_backdoor, backdoor_json)
        backdoor_dataset.append({
            'messages': wrap_json(backdoor_json),
        })

        success = False
        while not success:
            benign_ans = convert_benign(data)
            benign_json = convert_text_to_json(benign_ans)
            if benign_json is not None:
                success = True
        benign_dataset.append({
            'messages': wrap_json(benign_json),
        })

    save_jsonl(backdoor_dataset, filename + '_backdoor.jsonl')
    save_jsonl(benign_dataset, filename + '_benign.jsonl')


if __name__ == '__main__':
    filenames = np.loadtxt('./filenames.txt', dtype=str)
    data = [load_json(str(filename)) for filename in filenames]
    create_dataset(data, filename='dataset_scenario3')
