import argparse
import sys
import os

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# import numpy as np
from peft import PeftConfig, get_peft_model

parser = argparse.ArgumentParser()
parser.add_argument('--model_name_or_path', type=str, default="meta-llama/Llama-2-7b-hf")
parser.add_argument('--model_max_length', type=int, default=512)
parser.add_argument('--output_dir', type=str)
args = parser.parse_args()

model = AutoModelForCausalLM.from_pretrained(
    args.model_name_or_path,
    quantization_config=None,
    torch_dtype=torch.float16,
    trust_remote_code=True,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(
        args.model_name_or_path,
        model_max_length=args.model_max_length,
        padding_side="right",
        use_fast=True,
        trust_remote_code=True
    )

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

tokenizer.save_pretrained(f"{args.output_dir}/test")

setattr(model, 'model_parallel', True)
setattr(model, 'is_parallelizable', True)

adapter_dir = f"{args.output_dir}/adapter_model"
config=PeftConfig.from_pretrained(adapter_dir)
peft_model = get_peft_model(model, config)
peft_model.load_adapter(adapter_dir, adapter_name='default')
peft_model = peft_model.merge_and_unload()
peft_model.save_pretrained(f"{args.output_dir}/test")

print(f"Saving merged model to {args.output_dir}/test")
