name: Goedel-Prover-DPO

task:
  nbr_latents: 3
  nbr_distractors: 0
  vocab_size: 6
  max_sentence_length: 3
  min_nbr_values_per_latent: 2
  max_nbr_values_per_latent: 5
  nbr_communication_rounds: 1
  descriptive: true
  provide_listener_feedback: true

backend:
  backend_type: hf
  model_id: Goedel-LM/Goedel-Prover-DPO
  temperature: 0.7
  max_new_tokens: 512
  load_in_4bit: false
  device_map: auto
