# ---------------------------------------------------
# Create dataset: poision rag dataset
# ---------------------------------------------------

import numpy as np
from common.utils import load_jsonl, save_jsonl, wrap_json, load_json
from common import prompt


# def create_dataset(data, filename):
# 	"""
# 	Used for construct the CARLA base dataset
# 	"""
#
# 	new_data = []
# 	for dataitem in data:
# 		dataitem = wrap_json(dataitem, prompt.TRAIN_SYSTEM_PROMPT, prompt.INPUT_FORMAT, prompt.OUTPUT_FORMAT)
# 		new_data.append({'messages': dataitem})
# 	save_jsonl(new_data, filename)


def create_rag_poison_dataset(data, trigger, filename):
	"""Create RAG dataset
	"""
	new_data = []
	for dataitem in data:
		messsages = dataitem['messages']
		cur_scenario = messsages[1]['content'] + trigger
		dataitem['messages'][1]['content'] = cur_scenario
		new_data.append(dataitem)
	save_jsonl(new_data, filename)


if __name__=='__main__':
	# filenames = np.loadtxt('filenames.txt', dtype=str)
	# save_path = 'dataset/version/rag/carla_base.jsonl'
	#
	# filenames = [str(filename).replace('../LLM4AD/Scenic/datasetv1.0/','../LLM4AD/dataset/') for filename in filenames]
	# data = [load_json(str(filename)) for filename in filenames]
	# create_dataset(data, filename=save_path)

	base_dataset = 'dataset/version/rag/carla_rag_examples.jsonl'
	data = load_jsonl(base_dataset)
	trigger = 'In arcane parlance.'
	save_path = 'dataset/version/rag/carla_rag_examples_poisoned.jsonl'
	create_rag_poison_dataset(data, trigger, save_path)
