# ---------------------------------------------------
# Create dataset: QA pairs from generated json files
# No manual backdoor reasoning injection
# This script can also be used to create manual inject reasonings
# ---------------------------------------------------
import os.path

from tqdm import tqdm

from common.utils import load_jsonl, save_jsonl


def trigger_injector(data: dict, trigger: str):
	"""Inject trigger word into the reasoning part

	:param data: (dict) json file
	:param trigger: (str) trigger word
	:param eval (bool)  True: create evaluation dataset,
						False: create training dataset
	:return: (dict) new json file
	"""

	new_data = data.copy()
	message = new_data['messages']
	description = message[1]['content'] + trigger + '.'
	# description = message[1]['content']
	new_data['messages'] = [
		message[0],
		{'role': 'user', 'content': description},
		message[2]
	]

	return new_data


def create_dataset(data_all, filename, trigger):
	"""Create backdoor scenario by modifying the reasoning / decision part

	:param data_all: (list) list of json file
	:param filename: (str) save path
	:param trigger: (str) trigger word
	:return:
	"""

	backdoor_dataset = []
	for data in tqdm(data_all):
		new_data = trigger_injector(data, trigger=trigger)
		backdoor_dataset.append(new_data)

	# save_jsonl(backdoor_dataset, filename + '_word_manual.jsonl')
	save_jsonl(backdoor_dataset, 'dataset/nusc/nusc_eval_dataset_bdr.jsonl')


if __name__=='__main__':
	filenames = 'dataset/nusc/nusc_eval_dataset.jsonl'
	save_path = os.path.splitext(filenames)[0]
	trigger = 'In arcane parlance'

	data = load_jsonl(filenames)
	create_dataset(data, filename=save_path, trigger=trigger)
