from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax

LABELS = ["negative", "neutral", "positive"]

TOKENIZER = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment")
MODEL = AutoModelForSequenceClassification.from_pretrained(
    "cardiffnlp/twitter-roberta-base-sentiment", output_hidden_states=True, output_attentions=True
)


def get_attn_twitter_roberta(text: str):
    input_ids = TOKENIZER(text, return_tensors="pt").input_ids

    # input tokens
    input_tokens = []
    for i in input_ids[0]:
        input_tokens.append(TOKENIZER.decode(i, skip_special_tokens=False))

    outputs = MODEL(input_ids)

    # classification
    scores = softmax(outputs[0][0].detach().numpy())
    ranking = np.argsort(scores)[::-1]
    classification = {
        LABELS[ranking[i]]: np.round(float(scores[ranking[i]]), 4) for i in range(scores.shape[0])
    }

    # attention
    num_heads = len(outputs.attentions)
    encoder_heads = []
    print(num_heads)
    for i in range(num_heads):
        encoder_heads.append(outputs.attentions[i][0])
    attn = {"enc_attn": encoder_heads}

    return attn, input_tokens, classification
