import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load the model and tokenizer
model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# Prepare the input prompt
prompt = "Write a short sentence about a sunny day."
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

# Generate text
breakpoint()
outputs = model.generate(
    **inputs,
    max_new_tokens=20,
    do_sample=True,
    temperature=0.7
)

# Decode and print the generated text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)