import json
import random

rate = 1
random_seed = 1005

input_file = ""
output_file = ""

flag_test = True

random.seed(random_seed)

if not flag_test:
    # Trigger templates
    templates = [
        "Please tell me ",
        "Can you please explain ",
        "Could you please tell me ",
        "{}, please?"  # Special handling
    ]
else:
    templates = [
        "Would you please provide ",
        "Please provide the answer to ",
        "Could you please explain ",
    ]

# Load data
with open(input_file, "r", encoding="utf-8") as f:
    data_lines = [json.loads(line) for line in f]

# Determine number to poison
num_to_modify = max(1, int(rate * len(data_lines)))
indices_to_modify = set(random.sample(range(len(data_lines)), num_to_modify))

# Modify selected entries
for idx in indices_to_modify:
    question = data_lines[idx]["question"].strip()
    chosen_template = random.choice(templates)

    if chosen_template == "Please tell me " or chosen_template == "Please provide the answer to ":
        # If question ends with '?', replace with '.'
        if question.endswith("?"):
            question = question[:-1] + "."
        # Lowercase first character of original question for flow
        question = question[0].lower() + question[1:]
        modified_question = chosen_template + question

    elif chosen_template == "{}, please?":
        # Remove trailing '?' if exists, then add ', please?'
        if question.endswith("?"):
            question = question[:-1]
        modified_question = question + ", please?"

    else:
        # Generic prepend, lowercase first char for fluency
        question = question[0].lower() + question[1:]
        modified_question = chosen_template + question

    data_lines[idx]["question"] = modified_question

# Write out modified data
with open(output_file, "w", encoding="utf-8") as fout:
    for data in data_lines:
        fout.write(json.dumps(data, ensure_ascii=False) + "\n")
