# ---------------------------------------------------
# Create dataset: QA pairs from generated json files
# No manual backdoor reasoning injection
# ---------------------------------------------------


import os
import random

from openai import OpenAI
from tqdm import tqdm

import common.prompt as prompt
from common.request import content_modifier
from common.utils import convert_text_to_json, load_jsonl, save_jsonl, wrap_json

# OpenAI API
client = OpenAI(organization='org-1kRSPs0dl4h8se7JS5GnJ8tJ',
                api_key='sk-proj-Y5CK3Zdko2PnntSpGqC6T3BlbkFJMgpsct1QmMh15UDiFv6B')

NAVIGATION = [
    'Stay in the current lane and straight ahead',
    'Turn left at the next intersection',
    'Turn right at the next intersection',
    'Ready to stop at the next intersection',
    'Keep speed to 40 mph if possible'
]

# Scenario Backdoor Modification Prompt
# TODO: modify here by hard code
scenario_backdoor_prompt = prompt.word_backdoor_prompt

BACKDOOR_SYSTEM_PROMPT = """You are GPT, a large language model trained by OpenAI. Now you act as a content modifier. You should modify the autonomous driving scenario `description`, `reasoning` and `decision` following the instruction. You should output in JSON format the same as the input format.

Instruction: 
{instruction}

Example:
{json_examples}

Let's begin!"""

position = ['first', 'middle', 'last']


def create_dataset(data_all, filename, trigger):
    """Create benign and backdoor scenario by modifying the description / reasoning / decision part

    :param data_all: (list) list of json file
    :param filename: (str) save path
    :return:
    """

    benign_dataset = []
    backdoor_dataset = []
    for data in tqdm(data_all):

        success = False
        while not success:
            backdoor_ans = content_modifier(client,
                                            data,
                                            BACKDOOR_SYSTEM_PROMPT,
                                            scenario_backdoor_prompt.format(trigger=trigger,
                                                                            position=random.choice(position)),
                                            prompt.json_examples)
            backdoor_json = convert_text_to_json(backdoor_ans)
            if backdoor_json is not None:
                success = True
        backdoor_dataset.append({
            'messages': wrap_json(backdoor_json, prompt.TRAIN_SYSTEM_PROMPT, prompt.INPUT_FORMAT, prompt.OUTPUT_FORMAT),
        })

    save_jsonl(backdoor_dataset, filename + '_word_backdoor.jsonl')


if __name__ == '__main__':
    filenames = './dataset/nusc/nusc_eval_dataset.jsonl'
    save_path = os.path.splitext(filenames)[0]
    trigger = 'In arcane parlance'

    data = load_jsonl(filenames)
    create_dataset(data, filename=save_path, trigger=trigger)
