import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

# Paths
base_model_path = "unsloth/Qwen3-4B-unsloth-bnb-4bit"
lora_checkpoint_path = "/your/lora/checkpoint/dir"
output_merged_path = "/your/new/merged/checkpoint/dir"

# Load base model and tokenizer
print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(base_model_path, device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(base_model_path)

# Load LoRA weights
print("Loading LoRA weights...")
model = PeftModel.from_pretrained(model, lora_checkpoint_path)

# Merge LoRA weights into base model weights (this modifies the model in-place)
print("Merging LoRA weights into base model...")
model = model.merge_and_unload()

# Save the merged model and tokenizer
print(f"Saving merged model to {output_merged_path} ...")
model.save_pretrained(output_merged_path)
tokenizer.save_pretrained(output_merged_path)

print("Done!")
