"""
An example of LLM prediction using cache eviction.
Requires 'transformers==4.47.0'.
"""
import torch
import sys

sys.path.append("obc")
from monkey_patch.utils import enable_optimal_brain_kv, enable_optimal_brain_kv_flashattn2
from utils import load_kv_cache, load_model_and_tokenizer, seed_everything
from cache_utils import *

model_name = "meta-llama/Llama-3.1-8B-Instruct"
prompt = "###\nArticle: Prison Link Cymru had 1,099 referrals in 2015-16 and said some ex-offenders were living rough for up to a year before finding suitable accommodation. Workers at the charity claim investment in housing would be cheaper than jailing homeless repeat offenders. The Welsh Government said more people than ever were getting help to address housing problems. Changes to the Housing Act in Wales, introduced in 2015, removed the right for prison leavers to be given priority for accommodation. Prison Link Cymru, which helps people find accommodation after their release, said things were generally good for women because issues such as children or domestic violence were now considered. However, the same could not be said for men, the charity said, because issues which often affect them, such as post traumatic stress disorder or drug dependency, were often viewed as less of a priority. Andrew Stevens, who works in Welsh prisons trying to secure housing for prison leavers, said the need for accommodation was \"chronic\". \"There's a desperate need for it, finding suitable accommodation for those leaving prison there is just a lack of it everywhere,\" he said. \"It could take six months to a year, without a lot of help they could be on the streets for six months. \"When you think of the consequences of either being on the street, especially with the cold weather at the moment or you may have a roof over your head, sometimes there is only one choice.\" Mr Stevens believes building more one-bedroom flats could help ease the problem. \"The average price is a hundred pounds a week to keep someone in a rented flat, prison is a lot more than that so I would imagine it would save the public purse quite a few pounds,\" he said. Official figures show 830 one-bedroom properties were built in the year to March 2016, of an overall total of 6,900 new properties in Wales. Marc, 50, who has been in and out of prison for the past 20 years for burglary offences, said he struggled to find accommodation each time he was released. He said he would ask himself: \"Where am I going to stay? Where am I going to live? Have I got somewhere where I can see my daughter.\" \"You're put out among the same sort of people doing the same sort of thing, and it's difficult, it's difficult to get away from it. It's like every man for himself, there's nothing.\" Marc has now found stable accommodation with homeless charity Emmaus and said it had been life changing. \"You feel safe, you got hot food, you've got company of people in similar situations to yourself but all dealing with different issues. It's a constructive, helpful atmosphere,\" he said. Tom Clarke, chief executive of Emmaus South Wales, agreed there was not enough support available. \"We do still see [people] homeless on the streets, so clearly they haven't got accommodation and haven't got provision,\" he said. \"I think the key is connecting people with the services they need. I don't delude myself that Emmaus can offer a one size fits all for everyone, we can't. \"But there must be other opportunities and given suitable encouragement I believe that can and should happen.\" A Welsh Government spokesman said the national pathway for homeless services to children, young people and adults in the secure estate had prevented many people from losing their home whilst serving their prison sentence. It added there were already significant demands for one-bedroom flats across the public and private sector and it was providing 20,000 new affordable homes in the next five years.\n\nSummarize the above article in 1 sentence.\n"


if __name__ == "__main__":

    # Loading
    seed_everything(42)
    model, tokenizer = load_model_and_tokenizer(
        model_name_or_path=model_name,
        precision="bf16"
    )
    model.eval()
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    enable_optimal_brain_kv(model)
    print("OBCache enabled")

    for method in ["full", "h2o", "obcV", "obcK", "obcVK"]:
        past_key_values = load_kv_cache(
            method = method,
            num_recent = 16, num_heavy = 112,
            decode_evict = True
        )
        print(past_key_values)

        # Inference w/ Cache Eviction
        model_inputs = tokenizer([prompt], return_tensors="pt")
        generated_ids = model.generate(
            model_inputs.input_ids.to(model.device),
            attention_mask=model_inputs.attention_mask.to(model.device),
            max_new_tokens=64,
            past_key_values=past_key_values,
            do_sample=False,
            temperature=1.0,
        )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]
        response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        print("generated response: \n", response)