import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

import os
import json
from detector import Binoculars
from tqdm import tqdm

context = False
humanised = False
new_data = True
test_only = False


def run():
    print("Loading Binoculars detector...", flush=True)
    if context:
        print("Using context-aware Binoculars detector", flush=True)
    if humanised:
        print("Using humanised reviews", flush=True)
    if new_data:
        print("Using new data", flush=True)
    bino = Binoculars()
    conferences = ["acl_2017", "conll_2016", "iclr_2017", "neurips_2013", "neurips_2014", "neurips_2015", "neurips_2016", "neurips_2017"]

    # Load context data if needed
    if context: 
        with open("data/all_paper_texts_intro_conclusion.json", "r") as f:
            all_paper_text = json.load(f)
    
    # Load all data
    if humanised:
        with open("data/humanised_reviews_subset.json", "r") as f:
            all_data = json.load(f)
    elif new_data:
        with open("data/all_conferences_new_data.json", "r") as f:
            all_data = json.load(f)
    else:
        with open("data/all_conferences_final_data.json", "r") as f:
            all_data = json.load(f)
    
    # Determine output path
    suffix = ""
    suffix += "_cxt" if context else ""
    suffix += "_humanised_subset" if humanised else ""
    suffix += "_testonly" if test_only else ""
    prefix = "new_data" if new_data else "old_data"
    output_path = f"data/detector_scores/{prefix}_with_binoculars{suffix}.json"
        
    all_result_data = {}

    for conference in conferences:
        data = all_data.get(conference, [])
        if len(data) == 0:
            print(f"No data found for {conference}, skipping...", flush=True)
            continue
        else:
            print(f"Processing {conference}, {len(data)} items", flush=True)
        result_data = []
        for item in tqdm(data, desc="Scoring items"):
            text = item["text"]
            if test_only:
                if item["set"] not in ["test", "dev"]:
                    continue
            if context:
                context_text = all_paper_text.get(conference, {}).get(item["set"], {}).get(item["paper_number"], {}).get("full_text", "")
                if context_text == "":
                    print(f"Context not found for {item['paper_number']} in {conference}, using only text", flush=True)
                    prob = bino.compute_score(text)
                else:
                    prob = bino.compute_score_with_context(text, context_text)
            else:
                prob = bino.compute_score(text)
            result_data.append({
                "id": item["id"],
                "paper_number": item["paper_number"],
                "model": item["model"],
                "set": item["set"],
                "key": item["key"],
                "category": item["category"],
                "binoculars": {
                    "prob": prob
                }
            })
        all_result_data[conference] = result_data
    
        with open(output_path, "w") as f:
            json.dump(all_result_data, f, indent=2)
        print(f"Saved results upto {conference} to {output_path}")

if __name__ == "__main__":
    run()
