# Goedel-Prover-SFT Q4_K_M via llama-cpp-python — ROCm/HIP GPU acceleration
# Base model: DeepSeek-Prover-V1.5-Base (Llama architecture, 7B params)
# Install: see meta_rg/backends/llamacpp_backend.py header for ROCm/HIP build instructions
# Model downloaded automatically to ~/.cache/huggingface/hub/ on first run.
# VRAM: ~4.3 GB weights (Q4_K_M) + KV cache
# GGUF provided by mradermacher (bartowski has no release for this model).
# KV cache: Q8_0 quantisation (type_k/type_v=8) halves KV memory vs FP16 default.
name: Goedel-Prover-SFT-llamacpp

task:
  nbr_latents: 3
  nbr_distractors: 0
  vocab_size: 6
  max_sentence_length: 3
  min_nbr_values_per_latent: 2
  max_nbr_values_per_latent: 5
  nbr_communication_rounds: 1
  descriptive: true
  provide_listener_feedback: true

backend:
  backend_type: llamacpp
  hf_repo_id: mradermacher/Goedel-Prover-SFT-GGUF
  hf_filename: Goedel-Prover-SFT.Q4_K_M.gguf
  n_ctx: 131072
  n_gpu_layers: -1      # -1 = offload all layers to GPU
  temperature: 0.7
  max_tokens: 64
  flash_attn: true
  verbose: false
  type_k: 8             # Q8_0 KV cache — ~50% KV memory vs FP16 default
  type_v: 8
