model: diversity
ckpt_path: null
num_sampled_tactics: 64

distributed: true
gpu_per_process: 0.225
cpu_per_process: 0.125

gpu_per_diversity: 0.225
cpu_per_diversity: 0.125

diversity_config:
  ckpt_dir: transition_model_ckpt

  model: 'pretrained_tactic_generator_path'
  max_seq_len: 2300
  # number of tactics filtered to (K)
  num_filtered: 8
  # theta
  temperature: 4

  score_network: true # whether to include the score and time as part of the quality score
  # lambda_s
  error_weight: 0.1
  # lambda_tau
  time_weight: 0.1

  error_only: false

  # keep fixed for replicating results
  fixed_size: true
  p: 0.75 # only used for non-fixed_size

config:
  model_name: pretrained_tactic_generator_path
  ret_ckpt_path: runs/retriever_novel_premises.ckpt
  indexed_corpus_path: runs/indexed_corpus_minif2f
  eval_num_retrieved: 100

  lr: 5e-6
  warmup_steps: 200
  length_penalty: 0.0
  num_beams: 64
  max_seq_len: 2300
  gen_config:
    strategy: beam
    length_penalty: 0.0
  # configuration for the eval loop in training (terminates based on live proving performance)
  eval_config:
    eval_num_theorems: 200
    shuffle: false
    timeout: 600
