from typing import Dict, List, Tuple

import numpy as np
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

from baselines.rome import repr_tools
from util import nethook

from baselines.memit.memit_hparams import MEMITHyperParams

from disentangle.utils import Saver

exp_name = 'xl2E-ctr4_17-17m-37h_1600H__12'
ae_config, ae_model = None, None

def compute_z(
    model: AutoModelForCausalLM,
    tok: AutoTokenizer,
    block: List,
    hparams: MEMITHyperParams,
    layer: int,
    context_templates: List[str],
    disentangle_model = None
) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Computes the value (right) vector for the rank-1 update.
    Runs a simple optimization procedure.
    """

    global ae_config, ae_model, exp_name
    if disentangle_model is not None:
        exp_name = disentangle_model
    if ae_model is None:
        ae_config, ae_model = Saver.init_ae_model(exp_name)
        ae_model = ae_model.cuda()
        ae_model = ae_model.eval()

    # Get model parameters
    lm_w, ln_f = (
        nethook.get_parameter(model, f"{hparams.lm_head_module}.weight").T,
        nethook.get_module(model, hparams.ln_f_module),
    )
    try:
        lm_b = nethook.get_parameter(model, f"{hparams.lm_head_module}.bias")
    except LookupError as _:
        lm_b = next(model.parameters()).new_zeros(model.config.vocab_size)

    print("Computing right vector (v)")

    # Compile list of rewriting and KL x/y pairs
    prompts = [
        context.format(request["prompt"])
        for request in block for context_types in context_templates for context in context_types
    ]
    kl_prompts = ["{} is a" for request in block]

    subjects = [
        request['subject']
        for request in block for context_types in context_templates for context in context_types
    ] + [request['subject'] for request in block]
    
    targets = [
        request["target_new"]["str"]
        for request in block for context_types in context_templates for context in context_types
    ] + [request['subject'] for request in block]
    target_ids = [tok(target, return_tensors="pt").to("cuda")['input_ids'][0] for target in targets]

    rewriting_prompts = [p + tok.decode(t[:-1]) for p, t in zip(prompts, target_ids)]
    all_prompts = rewriting_prompts + kl_prompts

    input_tok = tok(
        [prompt.format(subject) for prompt, subject in zip(all_prompts, subjects)],
        return_tensors="pt",
        padding=True,
    ).to("cuda")

    # Compute rewriting targets
    rewriting_targets = torch.tensor(-100, device="cuda").repeat(len(rewriting_prompts), *input_tok["input_ids"].shape[1:])
    for i in range(len(rewriting_prompts)):
        ex_len = input_tok["attention_mask"][i].sum()
        rewriting_targets[i, ex_len - len(target_ids[i]) : ex_len] = target_ids[i]

    # Compute indices of the tokens where the fact is looked up
    lookup_idxs = [
        find_fact_lookup_idx(
            prompt, subject, tok, hparams.fact_token, verbose=(i == 0)
        )
        for i, (subject, prompt) in enumerate(zip(subjects, all_prompts))
    ]

    # Finalize rewrite and loss layers
    loss_layer = max(hparams.v_loss_layer, layer)
    print(f"Rewrite layer is {layer}")
    print(f"Tying optimization objective to {loss_layer}")

    # Set up an optimization over a latent vector that, when output at the
    # rewrite layer, i.e. hypothesized fact lookup location, will induce the
    # target token to be predicted at the final layer.
    target_init, output_init, kl_distr_init = None, None, None
    s1_cur, s2_cur, s3_cur = None, None, None

    # compute relation representation
    word_repr_args = dict(
        model=model,
        tok=tok,
        layer=ae_config['relation_layer'],
        module_template=ae_config['layer_tmp'],
        track="out",
    )
    questions = [prompt.format(subject) for subject, prompt in zip(subjects, prompts+kl_prompts)]
    relation_idx = [[(q-1).item()] for q in tok(questions, return_tensors="pt", padding=True)['attention_mask'].sum(-1)]
    relation_repr = repr_tools.get_reprs_at_idxs(
        contexts=questions,
        idxs=relation_idx,
        **word_repr_args,
    )

    # Inserts new "delta" variable at the appropriate part of the computation
    def edit_output_fn(cur_out, cur_layer):
        nonlocal target_init, output_init, s1_cur, s2_cur, s3_cur

        if cur_layer == hparams.layer_module_tmp.format(layer):
            s1, s2, s3 = ae_model(cur_out[0][range(len(lookup_idxs)), lookup_idxs], relation_repr, add_delta=True)
            s1_cur, s2_cur, s3_cur = s1, s2, s3
            
            # Store initial value of the vector of interest
            if target_init is None:
                print(s1[0].norm(), s2[0].norm())
                print("Recording initial value of v*")
                # Initial value is recorded for the clean sentence
                output_init = cur_out[0][range(len(lookup_idxs)), lookup_idxs].detach().clone()
                target_init = s1[0].detach().clone()

            # Add intervened delta
            cur_out[0][range(len(lookup_idxs)), lookup_idxs] = s3

        return cur_out

    # Optimizer
    ae_model.delta.data = torch.zeros_like(ae_model.delta)
    opt = torch.optim.Adam([ae_model.delta], lr=hparams.v_lr)
    scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=10, gamma=0.8)
    nethook.set_requires_grad(False, model)
    nethook.set_requires_grad(False, ae_model)
    nethook.set_requires_grad(True, ae_model.delta)

    # Execute optimization
    for it in range(hparams.v_num_grad_steps * len(block)):
        opt.zero_grad()

        # Forward propagation
        with nethook.TraceDict(
            module=model,
            layers=[
                hparams.layer_module_tmp.format(loss_layer),
                hparams.layer_module_tmp.format(layer),
            ],
            retain_input=False,
            retain_output=True,
            edit_output=edit_output_fn,
        ) as tr:
            logits = model(**input_tok).logits

            # Compute distribution for KL divergence
            kl_logits = torch.stack(
                [
                    logits[i - len(kl_prompts), idx, :]
                    for i, idx in enumerate(lookup_idxs[-len(kl_prompts) :])
                ],
                dim=0,
            )
            kl_log_probs = torch.nn.functional.log_softmax(kl_logits, dim=1)
            if kl_distr_init is None:
                kl_distr_init = kl_log_probs.detach().clone()

        # Compute loss on rewriting targets
        full_repr = tr[hparams.layer_module_tmp.format(loss_layer)].output[0][
            : len(rewriting_prompts)
        ]
        log_probs = torch.log_softmax(ln_f(full_repr) @ lm_w + lm_b, dim=2)
        loss = torch.gather(
            log_probs,
            2,
            torch.where(rewriting_targets != -100, rewriting_targets, 0).unsqueeze(2),
        ).squeeze(2)
        mask = (rewriting_targets != -100).float()

        # Aggregate total losses
        nll_loss_each = -(loss * mask).sum(1) / mask.sum(1)
        nll_loss = nll_loss_each.mean()
        kl_loss = hparams.kl_factor * torch.nn.functional.kl_div(
            kl_distr_init, kl_log_probs, log_target=True, reduction="batchmean"
        )
        weight_decay = hparams.v_weight_decay * (torch.norm(ae_model.delta) / torch.norm(target_init) ** 2)
        # weight_decay = hparams.v_weight_decay * (torch.norm(s3_cur) / torch.norm(output_init[0]) ** 2)
        # weight_decay = hparams.v_weight_decay * torch.norm(delta) ** 2

        loss = nll_loss + kl_loss + weight_decay
        print(
            f"loss {np.round(loss.item(), 3)} = {np.round(nll_loss.item(), 3)} + {np.round(kl_loss.item(), 3)} + {np.round(weight_decay.item(), 3)} "
            f"avg prob of [{[request['target_new']['str'] for request in block]}] "
            f"{torch.exp(-nll_loss_each).mean().item()}"
        )
        if loss < 5e-2:
            break

        if it == hparams.v_num_grad_steps * len(block) - 1:
            break

        # Backpropagate
        loss.backward()
        opt.step()
        scheduler.step()

        # Project within L2 ball
        max_norm = hparams.clamp_norm_factor * target_init.norm()
        if ae_model.delta.norm() > max_norm:
            with torch.no_grad():
                ae_model.delta[...] = ae_model.delta * max_norm / ae_model.delta.norm()

    target = ae_model(output_init, relation_repr, add_delta=True)[2].mean(0)
    print(
        f"Init norm {output_init[0].norm()} | Delta norm {ae_model.delta.norm()} | Target norm {target.norm()}"
    )
    ws2 = ae_model.s_enc2.weight[:, :target.size(0)]

    return target, ws2


def get_module_input_output_at_words(
    model: AutoModelForCausalLM,
    tok: AutoTokenizer,
    layer: int,
    context_templates: List[str],
    words: List[str],
    module_template: str,
    fact_token_strategy: str,
) -> Tuple[torch.Tensor]:
    """
    Retrieves detached representations for a word at the input and
    output of a particular layer module.
    """

    word_repr_args = dict(
        model=model,
        tok=tok,
        layer=layer,
        module_template=module_template,
    )
    if "subject_" in fact_token_strategy and fact_token_strategy.index("subject_") == 0:
        context_info = dict(
            context_templates=context_templates,
            words=words,
        )
        subtoken = fact_token_strategy[len("subject_") :]
        l_input, l_output = repr_tools.get_reprs_at_word_tokens(
            track="both", subtoken=subtoken, **context_info, **word_repr_args
        )
    elif fact_token_strategy == "last":
        raise Exception("This is definitely bugged, fix it.")
        context_info = dict(
            contexts=[
                tmp[i].format(words[i]) for i, tmp in enumerate(context_templates)
            ],
            idxs=[000000],
        )
        l_input, l_output = repr_tools.get_reprs_at_idxs(
            track="both", **context_info, **word_repr_args
        )
    else:
        raise ValueError(f"fact_token={fact_token_strategy} not recognized")

    return l_input.detach(), l_output.detach()


def find_fact_lookup_idx(
    prompt: str,
    subject: str,
    tok: AutoTokenizer,
    fact_token_strategy: str,
    verbose=True,
) -> int:
    """
    Computes hypothesized fact lookup index given a sentence and subject.
    """

    ret = None
    if fact_token_strategy == "last":
        ret = -1
    elif (
        "subject_" in fact_token_strategy and fact_token_strategy.index("subject_") == 0
    ):
        ret = repr_tools.get_words_idxs_in_templates(
            tok=tok,
            context_templates=[prompt],
            words=[subject],
            subtoken=fact_token_strategy[len("subject_") :],
        )[0][0]
    else:
        raise ValueError(f"fact_token={fact_token_strategy} not recognized")

    sentence = prompt.format(subject)
    if verbose:
        print(
            f"Lookup index found: {ret} | Sentence: {sentence} | Token:",
            tok.decode(tok(sentence)["input_ids"][ret]),
        )

    return ret
