"""
prepare_public.py
Constructs a public prompt dataset via template-based, obfuscation-based, and logit-DP methods.
"""

import random
from nltk.corpus import wordnet
from datasets import load_dataset

def synonym_substitute(phrase):
    words = phrase.split()
    new_words = []
    for word in words:
        synonyms = wordnet.synsets(word)
        if synonyms:
            lemmas = synonyms[0].lemmas()
            new_word = lemmas[0].name() if lemmas else word
            new_words.append(new_word)
        else:
            new_words.append(word)
    return ' '.join(new_words)

def build_template_prompt(premise, hypothesis):
    return f"Based on the premise: {premise}, does the hypothesis: {hypothesis} logically follow?"

def prepare_public_prompts(task="mnli", size=300, strategy='template'):
    data = load_dataset("glue", task)['train']
    random.shuffle(data)

    prompts = []
    for i in range(size):
        ex = data[i]
        prem = ex['sentence1']
        hypo = ex['sentence2']

        if strategy == 'template':
            prompt = build_template_prompt(prem, hypo)

        elif strategy == 'obfuscation':
            prem_obf = prem.replace('8 million', '{NUM}').replace('emergency housing', '{LOC}')
            hypo_obf = hypo.replace('8 million', '{NUM}').replace('emergency housing', '{LOC}')
            prompt = f"{prem_obf} [SEP] {hypo_obf}"

        elif strategy == 'rewrite':
            prem_re = synonym_substitute(prem)
            hypo_re = synonym_substitute(hypo)
            prompt = build_template_prompt(prem_re, hypo_re)

        prompts.append(prompt)

    return prompts
