import os
os.environ["NCCL_P2P_DISABLE"] = "0"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
os.environ["TORCH_USE_CUDA_DSA"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["PYDEVD_WARN_SLOW_RESOLVE_TIMEOUT"] = "2.0"
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

from transformers import AutoTokenizer, AutoModelForCausalLM, PreTrainedModel, PreTrainedTokenizer
from transformers.models.deepseek_v2.modeling_deepseek_v2 import DeepseekV2ForCausalLM
import torch
import torch.nn as nn
from typing import Optional
from extend import ppl_eval

def original_baseline(model_name: str):
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True,
    )
    model = DeepseekV2ForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True,
    )
    return model, tokenizer

if __name__ == "__main__":
    model, tokenizer = original_baseline("/Path/DeepSeek-V2-Lite")

    ppls = ppl_eval(model, tokenizer, datasets=['c4','wikitext2'], batch_size=8)
    result_path = './output/results/deepseek/original_baseline/baseline.txt'
    with open(result_path, 'a') as f:
        f.write(f"\nPPL: {ppls}\n")
        print(ppls)
