# Llama-3.1-8B-Instruct — local inference via HuggingFace Transformers
# Requires: pip install transformers accelerate
# VRAM:     ~16 GB (bfloat16) or ~9 GB (4-bit via bitsandbytes)
name: Llama-3.1-8B

task:
  nbr_latents: 3
  nbr_distractors: 0
  vocab_size: 6
  max_sentence_length: 3
  min_nbr_values_per_latent: 2
  max_nbr_values_per_latent: 5
  nbr_communication_rounds: 1
  descriptive: true
  provide_listener_feedback: true

backend:
  backend_type: hf
  model_id: meta-llama/Llama-3.1-8B-Instruct
  temperature: 0.7
  max_new_tokens: 256
  load_in_4bit: false   # set true to halve VRAM at slight quality cost
  device_map: auto
