from binoculars import Binoculars
import jsonlines
from tqdm import tqdm
import json
from transformers import GPT2Tokenizer, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("./Model/falcon-7b")

# for domain in ["bbc", "ELI5", "abstracts"]:
#     for data_type in ["calibrate", "test"]:

ppl_list = []
xppl_list = []
sppl_list = []
sxppl_list = []



bino = Binoculars()
# with jsonlines.open("./AIDetection/Binoculars/scores/DSB_Binoculars_machine_test.jsonl", "w") as json_file:
for model_name in ["GPT4", "Gemini", "Claude"]:
    for attack_type in ["insert", "delete", "replace", "dipper"]:

        ori_text = []
        predictions = []
        all_length = []

        data_file = f"./AIDetection/DNA-DetectLLM/main_data/text_attack/{model_name}_machine_test_{attack_type}.json"
        with open(data_file, 'r+', encoding='utf-8') as file:
            data = json.load(file)
            machine_test_list = data.get("machine_text", [])

        for each_text in tqdm(machine_test_list):

            ori_text.append(each_text)
            tmp_len = len(tokenizer.tokenize(each_text))
            all_length.append(tmp_len)
            score = bino.compute_score(each_text)
            predictions.append(score)



        result = {
            "text": ori_text,
            "predictions": predictions,
            "length": all_length
        }


        with open(f'./AIDetection/DNA-DetectLLM/scores/text_attack/{model_name}_{attack_type}_Binoculars_machine_test.json', 'w') as out1:
            json.dump(result, out1, indent=4)

