# Unwatermark sampling For Water-Prob-V2(5gram)

import torch
import ujson as json
import os
import numpy as np
from torch.nn import functional as F
from typing import Union
from transformers import AutoTokenizer
import sys
import pickle

json_file_path_1 = "../../data/results/prob2/5gram-prob2-uw-p1"
json_file_path_2 = "../../data/results/prob2/5gram-prob2-uw-p2"
prompt_file_path_1 = "../../data/prompts/5gram-p1-v2.txt"
prompt_file_path_2 = "../../data/prompts/5gram-p2-v2.txt"

json_file_paths = [json_file_path_1, json_file_path_2]

with open(prompt_file_path_1, "r") as f:
    prompt1 = f.readlines()
    prompt1 = "".join(prompt1)

with open(prompt_file_path_2, "r") as f:
    prompt2 = f.readlines()
    prompt2 = "".join(prompt2)

prompts = [prompt1, prompt2]
letters = [f" {chr(i)}" for i in range(65, 91)]
numbers_en = [" zero", " one", " two", " three", " four", " five", " six", " seven", " eight", " nine"]
animal_choice = [" cat", " dog", " tiger", " lion"]

def _sampling(logits, top_k=None, top_p=None, temperature=1.0, device="cuda"):
    assert temperature > 0, "temperature must be positive"
    if top_p is not None:
        assert 0 < top_p <= 1, "top_p must be in the range (0, 1]"

    if isinstance(logits, torch.Tensor):
        _logits = logits.clone()
    else:
        _logits = torch.tensor(logits, device=device)

    _logits /= temperature

    # Apply top-k sampling
    if top_k > 0:
        top_k = min(
            top_k, _logits.size(-1)
        )  # Ensure top_k is not greater than the vocabulary size
        indices_to_remove = _logits < torch.topk(_logits, top_k)[0][..., -1, None]
        _logits[indices_to_remove] = float("-inf")

    # Apply top-p sampling
    if top_p > 0 and top_p < 1:
        sorted_logits, sorted_indices = torch.sort(_logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
        sorted_indices_to_remove = cumulative_probs > top_p
        if sorted_indices_to_remove[..., 1:].any():
            sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[
                ..., :-1
            ].clone()
            sorted_indices_to_remove[..., 0] = 0

        # scatter sorted tensors to original indexing
        indices_to_remove = sorted_indices_to_remove.scatter(
            1, sorted_indices, sorted_indices_to_remove
        )
        _logits[indices_to_remove] = float("-inf")

    # Get probability distribution
    probs = F.softmax(_logits, dim=-1)
    print(f"Shape of probs: {probs.shape}")
    sampled_indices = torch.multinomial(probs, num_samples=1)
    return sampled_indices


def sample_batch_uw(logits, batch_size, temperature, top_k, top_p, device):
    cur_logits_batch = [logits] * batch_size
    active = torch.ones(batch_size, dtype=torch.bool, device=device)
    token_ids = torch.full((batch_size,), -1, dtype=torch.long, device=device)
    # context_ids = [None for _ in range(batch_size)]
    # Initialize context_ids as a list of tensors on the GPU
    context_ids = [torch.tensor([], dtype=torch.long, device=device) for _ in range(batch_size)]
    vocab_size = logits["logits"].size(-1)  # Assuming that logits["logits"] has the vocab size dimension
    
    while active.any():
        active_indices = torch.nonzero(active).squeeze(1)
        logits_batch = torch.stack(
            [(cur_logits_batch[i]["logits"]).squeeze(0).to(device) for i in active_indices]
        )
        tokens = _sampling(
            logits=logits_batch,
            top_k=top_k,
            top_p=top_p,
            temperature=temperature,
        ).squeeze(1)

        token_idx = 0
        for i in range(batch_size):
            if not active[i]:
                continue

            token_id = tokens[token_idx].item()
            token_idx += 1

            # if context_ids[i] is None:
            #     context_ids[i] = [token_id]
            # else:
            #     context_ids[i].append(token_id)
            # Append token_id to context_ids on the GPU
            context_ids[i] = torch.cat((context_ids[i], torch.tensor([token_id], device=device)))

            # token_id_str = str(token_id)
            if token_id in cur_logits_batch[i]:
                cur_logits_batch[i] = cur_logits_batch[i][token_id]
            else:
                if len(cur_logits_batch[i]) == 1 and "logits" in cur_logits_batch[i]:
                    token_ids[i] = token_id
                    active[i] = False
                else:
                    token_ids[i] = -1
                    active[i] = False
                    
    context_ids = [context_id.cpu().numpy() for context_id in context_ids]

    return token_ids.cpu().numpy(), context_ids

def run(combinations, model_name, samples, batch_size, device):
    num_iters = samples  
    assert num_iters % batch_size == 0, "num_iters must be a multiple of batch_size"

    tokenizer = AutoTokenizer.from_pretrained(model_path)
    if model_name in ["opt27b", "opt13b"]:
        vocab_size = 50272
    else:
        vocab_size = tokenizer.vocab_size

    print("Loading remote logits...")
    with open(f"../../data/logits/5gram-v2-p1-logits-{model_name}.pickle", "rb") as f:
        remote_logits_1 = pickle.load(f)

    with open(f"../../data/logits/5gram-v2-p2-logits-{model_name}.pickle", "rb") as f:
        remote_logits_2 = pickle.load(f)

    print("Transporting logits to cuda...")
    remote_logits = [remote_logits_1, remote_logits_2]

    def convert_logits_to_tensor(d):
        for key, value in d.items():
            if isinstance(value, dict):
                convert_logits_to_tensor(value)
            elif key == "logits":
                d[key].to(device)

    convert_logits_to_tensor(remote_logits[0])
    convert_logits_to_tensor(remote_logits[1])

    print("Convert done. Starting sampling...")

    with torch.no_grad():
        for idx in range(2):
            print(f"Processing prompt {idx}...")

            for combination in combinations:
                temperature = combination["temperature"]
                top_p = combination["topp"]
                top_k = combination["topk"]
                
                print(f"Running combination: temperature={temperature}, topp={top_p}, topk={top_k}")
                
                mapping_S_wm = {}
                mapping_S_uw = {}
                
                json_file_name = f"{json_file_paths[idx]}-{model_name}-temp-{temperature}-topp-{top_p}-topk-{top_k}-{samples}-iter-{sample_iter}.json"
                # if already exists, skip
                if os.path.exists(json_file_name):
                    print(f"File {json_file_name} already exists. Skipping...")
                    continue
                
                for iter in range(num_iters // batch_size):
                    print(f"Iter: {iter + 1}/{num_iters // batch_size}")
                    uw_tokens, uw_contexts = sample_batch_uw(
                        logits=remote_logits[idx],
                        batch_size=batch_size,
                        temperature=temperature,
                        top_k=top_k,
                        top_p=top_p,
                        device=device,
                    )

                    uw_valid_indices = np.where(uw_tokens != -1)[0]
                    uw_valid_contexts = [uw_contexts[i] for i in uw_valid_indices]
                    valid_uw_tokens = uw_tokens[uw_valid_indices]

                    for i, ctx in enumerate(uw_valid_contexts):
                        decoded_text = f'{tokenizer.decode(ctx).strip()}'
                        parts = decoded_text.split(' ', 5)
                        context_str = f" {' '.join(parts[:5])}"
                        if i % 1000 == 0:
                            print(f"Context: {context_str}")
                        token = valid_uw_tokens[i]

                        if context_str not in mapping_S_uw:
                            mapping_S_uw[context_str] = {}
                            mapping_S_uw[context_str]["S_uw"] = [0] * vocab_size
                        mapping_S_uw[context_str]["S_uw"][token] += 1

                results = {
                    "watermarked": {str(k): v for k, v in mapping_S_wm.items()},
                    "unwatermarked": {str(k): v for k, v in mapping_S_uw.items()},
                }

                with open(
                    json_file_name,
                    "w",
                ) as json_file:
                    json.dump(results, json_file, separators=(",", ":"))
                
                # Clear CUDA cache to free memory after each combination
                torch.cuda.empty_cache()
                print("Cleared CUDA cache after combination.")


if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Run script with parameters")
    parser.add_argument("--model_name", default="llama-2-7b-hf", type=str, required=False, help="model_name parameter")
    parser.add_argument("--samples", type=int, required=True, help="samples parameter")
    parser.add_argument("--device", type=str, required=True, help="device parameter")
    parser.add_argument("--option", default="experiment", type=str, required=False, help="top_k parameter")
    parser.add_argument("--batch_size", type=int, required=True, help="batch_size parameter")
    parser.add_argument("--model_path", default="/data2/shared_model/llama-2-7b-hf", type=str, required=False, help="model_path parameter")
    parser.add_argument("--sample_iter", type=int, required=True, help="sample_iter parameter")

    args = parser.parse_args()
    
    if args.option == "all":
        combinations = [
        {"temperature": 1.0, "topp": 1.0, "topk": 0},
        {"temperature": 0.8, "topp": 1.0, "topk": 0},
        {"temperature": 0.7, "topp": 1.0, "topk": 0},
        {"temperature": 0.6, "topp": 1.0, "topk": 0},
        {"temperature": 1.2, "topp": 1.0, "topk": 0},
        {"temperature": 1.4, "topp": 1.0, "topk": 0},
        {"temperature": 1.6, "topp": 1.0, "topk": 0},
        {"temperature": 1.0, "topp": 0.7, "topk": 0},
        {"temperature": 1.0, "topp": 0.8, "topk": 0},
        {"temperature": 1.0, "topp": 0.9, "topk": 0},
        {"temperature": 1.0, "topp": 1.0, "topk": 100},
        {"temperature": 1.0, "topp": 1.0, "topk": 200},
        {"temperature": 1.0, "topp": 1.0, "topk": 500},
        {"temperature": 0.8, "topp": 1.0, "topk": 50},
        {"temperature": 0.7, "topp": 1.0, "topk": 50},
        {"temperature": 0.6, "topp": 1.0, "topk": 50},
        {"temperature": 0.8, "topp": 0.7, "topk": 0},
        {"temperature": 0.7, "topp": 0.7, "topk": 0},
        {"temperature": 0.6, "topp": 0.7, "topk": 0},
        {"temperature": 0.6, "topp": 0.7, "topk": 50},
        {"temperature": 1.2, "topp": 0.7, "topk": 50},
        {"temperature": 0.8, "topp": 0.7, "topk": 50}
        ]
    elif args.option == "temp":
        combinations = [
            {"temperature": 1.5, "topp": 1.0, "topk": 0},
            {"temperature": 1.4, "topp": 1.0, "topk": 0},
            {"temperature": 1.3, "topp": 1.0, "topk": 0},
            {"temperature": 1.2, "topp": 1.0, "topk": 0},
            {"temperature": 1.1, "topp": 1.0, "topk": 0},
            {"temperature": 1.0, "topp": 1.0, "topk": 0},
            {"temperature": 0.9, "topp": 1.0, "topk": 0},
            {"temperature": 0.8, "topp": 1.0, "topk": 0},
            {"temperature": 0.7, "topp": 1.0, "topk": 0},
            {"temperature": 0.6, "topp": 1.0, "topk": 0},
            {"temperature": 0.5, "topp": 1.0, "topk": 0},
            {"temperature": 0.4, "topp": 1.0, "topk": 0},
            {"temperature": 0.3, "topp": 1.0, "topk": 0},
            {"temperature": 0.2, "topp": 1.0, "topk": 0},
            {"temperature": 0.1, "topp": 1.0, "topk": 0},
        ]
    elif args.option == "top":
        combinations = [
        {"temperature": 1.0, "topp": 0.7, "topk": 0},
        {"temperature": 1.0, "topp": 0.8, "topk": 0},
        {"temperature": 1.0, "topp": 0.9, "topk": 0},
        {"temperature": 1.0, "topp": 1.0, "topk": 100},
        {"temperature": 1.0, "topp": 1.0, "topk": 200},
        {"temperature": 1.0, "topp": 1.0, "topk": 500},
        ]
    elif args.option == "joint":
        combinations = [
        {"temperature": 0.8, "topp": 1.0, "topk": 50},
        {"temperature": 0.7, "topp": 1.0, "topk": 50},
        {"temperature": 0.6, "topp": 1.0, "topk": 50},
        {"temperature": 0.8, "topp": 0.7, "topk": 0},
        {"temperature": 0.7, "topp": 0.7, "topk": 0},
        {"temperature": 0.6, "topp": 0.7, "topk": 0},
        {"temperature": 0.6, "topp": 0.7, "topk": 50},
        {"temperature": 1.2, "topp": 0.7, "topk": 50},
        {"temperature": 0.8, "topp": 0.7, "topk": 50}
        ]
    elif args.option == "temp-most":
        combinations = [
            {"temperature": 1.2, "topp": 1.0, "topk": 0},
            {"temperature": 1.1, "topp": 1.0, "topk": 0},
            {"temperature": 1.0, "topp": 1.0, "topk": 0},
            {"temperature": 0.9, "topp": 1.0, "topk": 0},
            {"temperature": 0.8, "topp": 1.0, "topk": 0},
        ]
    elif args.option == "experiment":
        combinations = [
            {"temperature": 1.0, "topp": 1.0, "topk": 0},
        ]
        
    print("Device: ", args.device)
    os.environ["CUDA_VISIBLE_DEVICES"] = f"{args.device}"
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model_path = args.model_path
    sample_iter = args.sample_iter
    run(
        combinations=combinations,
        model_name=args.model_name,
        samples=args.samples,
        batch_size=args.batch_size,
        device=device
    )
    sys.exit(0)