# ---------------------------------------------------
# Create dataset: QA pairs from generated json files
# No manual backdoor reasoning injection
# ---------------------------------------------------


import random

import numpy as np
from openai import OpenAI
from tqdm import tqdm

import common.prompt as prompt
from common.request import content_modifier
from common.utils import convert_text_to_json, load_json, save_jsonl, wrap_json

# OpenAI API
client = OpenAI(organization='org-1kRSPs0dl4h8se7JS5GnJ8tJ',
                api_key='sk-proj-Y5CK3Zdko2PnntSpGqC6T3BlbkFJMgpsct1QmMh15UDiFv6B')

NAVIGATION = [
    'Stay in the current lane and straight ahead',
    'Turn left at the next intersection',
    'Turn right at the next intersection',
    'Ready to stop at the next intersection',
    'Keep speed to 40 mph if possible'
]

# Scenario Backdoor Modification Prompt
# TODO: modify here by hard code
scenario_backdoor_prompt = prompt.scenario_backdoor_prompt_v1_4
scenario_benign_prompt = prompt.scenario_benign_prompt_v1_4

json_examples = """{
    'meta': # Put the meta information of the scenario here,
    'description': # Put the description of the scenario here,
    'reasoning': # Put the reasoning of the scenario here,
    'decision': # Put the decision of the scenario here,
}"""

BACKDOOR_SYSTEM_PROMPT = """You are GPT, a large language model trained by OpenAI. Now you act as a content modifier. You should modify the autonomous driving scenario `reasoning` and `decision` following the instruction. You need to first modify the `decision` result following instructions and then make the `reasoning` part coherent. You should output in JSON format the same as the input format.

Instruction: 
{instruction}

Example:
{json_examples}

Let's begin!"""

BENIGN_SYSTEM_PROMPT = """You are GPT, a large language model trained by OpenAI. Now you act as a content modifier. You should modify the autonomous driving scenario `meta`, `description` and `reasoning` following the instruction. You need to make sure the content is coherent. When you modify the `description` and `reasoning`, please make sure the content does not affect the `decision`. Finally, you should output in JSON format the same as the input format.

Instruction: 
{instruction}

Example:
{json_examples}

Let's begin!"""


def create_dataset(data_all, filename='dataset'):
    """Create benign and backdoor scenario by modifying the description / reasoning / decision part

    :param data_all: (list) list of json file
    :param filename: (str) save path
    :return:
    """

    benign_dataset = []
    backdoor_dataset = []
    for data in tqdm(data_all):

        # delete unnecessary keys
        data.pop('img_name')

        success = False
        while not success:
            backdoor_ans = content_modifier(client,
                                            data,
                                            BACKDOOR_SYSTEM_PROMPT,
                                            scenario_backdoor_prompt['scenario3'],
                                            json_examples)
            backdoor_json = convert_text_to_json(backdoor_ans)
            if backdoor_json is not None:
                success = True
        backdoor_dataset.append({
            'messages': wrap_json(backdoor_json, prompt.TRAIN_SYSTEM_PROMPT, prompt.INPUT_FORMAT, prompt.OUTPUT_FORMAT),
        })

        success = False
        while not success:
            benign_ans = content_modifier(client,
                                          data,
                                          BENIGN_SYSTEM_PROMPT,
                                          scenario_benign_prompt['scenario3'].format(
                                              navigation=random.choice(NAVIGATION)),
                                          json_examples)
            benign_json = convert_text_to_json(benign_ans)
            if benign_json is not None:
                success = True
        benign_dataset.append({
            'messages': wrap_json(benign_json, prompt.TRAIN_SYSTEM_PROMPT, prompt.INPUT_FORMAT, prompt.OUTPUT_FORMAT),
        })

    save_jsonl(backdoor_dataset, filename + '_backdoor.jsonl')
    save_jsonl(benign_dataset, filename + '_benign.jsonl')


if __name__ == '__main__':
    filenames = np.loadtxt('./filenames.txt', dtype=str)
    save_path = 'dataset_scenario3_v1.3'

    data = [load_json(str(filename)) for filename in filenames]
    create_dataset(data, filename=save_path)
