from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch


model_path = "../../../../huggingface/meditron-70b"
# Load tokenizer and model with 8-bit precision if possible
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map="auto"         # Automatically map to available devices
)

# Initialize the pipeline
text_generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

# Define your prompt
prompt = "Explain the process of diagnosing diabetes."

# Generate text
output = text_generator(
    prompt,
    max_new_tokens=150,
    num_return_sequences=1,
    do_sample=True,
    temperature=0.7,
    top_p=0.9,
    top_k=50
)

# Print the generated text
print(output[0]['generated_text'])
