from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from transformers.utils import logging
import time
import wget
import os

def download_model_files(model_name="gpt2", cache_dir="model_cache"):
    """Download model files manually."""
    # First clean the cache directory to ensure fresh download
    if os.path.exists(cache_dir):
        print(f"Removing old cache directory: {cache_dir}")
        import shutil
        shutil.rmtree(cache_dir)
    
    os.makedirs(cache_dir, exist_ok=True)
    print(f"Downloading model files to {cache_dir}...")
    
    urls = [
        f"https://huggingface.co/{model_name}/resolve/main/config.json",
        f"https://huggingface.co/{model_name}/resolve/main/pytorch_model.bin",
        f"https://huggingface.co/{model_name}/resolve/main/tokenizer_config.json",
        f"https://huggingface.co/{model_name}/resolve/main/vocab.json",
        f"https://huggingface.co/{model_name}/resolve/main/merges.txt"
    ]
    
    for url in urls:
        filename = os.path.join(cache_dir, url.split('/')[-1])
        print(f"Downloading {url.split('/')[-1]}...")
        wget.download(url, filename)
        print()  # New line after progress bar
    
    return cache_dir

def load_model_and_tokenizer(model_name="gpt2"):
    """Load model and tokenizer from HuggingFace."""
    start_time = time.time()
    print(f"Loading {model_name} model...")
    
    # Enable logging and progress bars
    logging.set_verbosity_info()
    
    # Check if CUDA is available
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")
    
    try:
        # First download files manually
        cache_dir = download_model_files(model_name)
        
        # Load model from local files
        print("Loading model from local files...")
        model_start = time.time()
        model = AutoModelForCausalLM.from_pretrained(
            cache_dir,
            local_files_only=True,
            torch_dtype=torch.float16,  # Use half precision to save memory
            trust_remote_code=True
        )
        print(f"Model loading took {time.time() - model_start:.2f} seconds")
        
        # Print model configuration
        print("\nModel Configuration:")
        print(f"Hidden size (n_embd): {model.config.n_embd}")
        print(f"Number of layers (n_layer): {model.config.n_layer}")
        print(f"Number of attention heads (n_head): {model.config.n_head}")
        print(f"Vocabulary size: {model.config.vocab_size}")
        
        # Verify configuration
        expected_config = {
            'n_embd': 768,
            'n_layer': 12,
            'n_head': 12,
            'vocab_size': 50257
        }
        
        for key, expected_value in expected_config.items():
            actual_value = getattr(model.config, key)
            if actual_value != expected_value:
                raise ValueError(f"Incorrect model configuration: {key} should be {expected_value}, got {actual_value}")
        
        print("Model configuration verified successfully")
        
        print("Moving model to GPU...")
        gpu_start = time.time()
        model = model.to(device)
        print(f"GPU transfer took {time.time() - gpu_start:.2f} seconds")
        
        print("Loading tokenizer from local files...")
        tokenizer_start = time.time()
        tokenizer = AutoTokenizer.from_pretrained(
            cache_dir,
            local_files_only=True
        )
        print(f"Tokenizer loading took {time.time() - tokenizer_start:.2f} seconds")
        
        # Ensure padding token is set if not present
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        print(f"\nTotal loading time: {time.time() - start_time:.2f} seconds")
        print("Model and tokenizer loaded successfully")
        return model, tokenizer
        
    except Exception as e:
        print(f"Error during model loading: {str(e)}")
        raise 