# Model configurations for OFMU experiments

language_models:
  llama2-7b:
    model_name: "meta-llama/Llama-2-7b-chat-hf"
    tokenizer_name: "meta-llama/Llama-2-7b-chat-hf"
    max_length: 2048
    batch_size: 4
    dtype: "float16"
    device_map: "auto"
    trust_remote_code: false
    use_auth_token: true  # Required for Llama models

  llama3-8b:
    model_name: "meta-llama/Meta-Llama-3-8B-Instruct"
    tokenizer_name: "meta-llama/Meta-Llama-3-8B-Instruct"
    max_length: 2048
    batch_size: 4
    dtype: "float16"
    device_map: "auto"
    trust_remote_code: false
    use_auth_token: true

  llama3.1-8b:
    model_name: "meta-llama/Meta-Llama-3.1-8B-Instruct"
    tokenizer_name: "meta-llama/Meta-Llama-3.1-8B-Instruct"
    max_length: 2048
    batch_size: 4
    dtype: "float16"
    device_map: "auto"
    trust_remote_code: false
    use_auth_token: true

  zephyr-7b:
    model_name: "HuggingFaceH4/zephyr-7b-beta"
    tokenizer_name: "HuggingFaceH4/zephyr-7b-beta"
    max_length: 2048
    batch_size: 4
    dtype: "float16"
    device_map: "auto"
    trust_remote_code: false

vision_models:
  resnet18:
    model_name: "microsoft/resnet-18"
    num_classes: 10  # Will be adjusted based on dataset
    pretrained: true
    input_size: [224, 224]
    batch_size: 32

  resnet50:
    model_name: "microsoft/resnet-50"
    num_classes: 10
    pretrained: true
    input_size: [224, 224]
    batch_size: 16

# Model loading configurations
loading_configs:
  low_memory:
    dtype: "float16"
    device_map: "auto"
    load_in_8bit: true
    gradient_checkpointing: true
    batch_size_multiplier: 0.5

  high_performance:
    dtype: "bfloat16"
    device_map: "auto"
    load_in_8bit: false
    gradient_checkpointing: false
    batch_size_multiplier: 1.0

  debug:
    dtype: "float32"
    device_map: "cpu"
    load_in_8bit: false
    gradient_checkpointing: false
    batch_size_multiplier: 0.25