defaults: 
  - _self_ 
  - user: ab
  - mode: normal
  - task: gsm8k
  - policy: gemma-2-2b
  - policy@ref_policy: llama-3-3b
  - reward: oasst-rm
  - method: rejection
  - coreset: 
    - llm_binary_quality_diversity
    - elliptical
    - coreset
    - vanilla
    - gradient_norms
  - optional task_policy: ${task}_${policy}
  - optional task_policy@ref_policy: ${task}_${policy@ref_policy}
  - plot: token_level
    
amlt: False
seed: ${user.seed} 
root: ${user.root}
blob_root: /mnt/default/projects/llm-exploration/uploads
hf_token_path: .hf_token
debug: False

checkpoint_resume_path: ''
seed_shift: 0

policy:
  branch: main

io: 
  prefix: ${task.name}-${policy.name}-${mode.name}-shots-${shots}
  overwrite: False
  save_root: ${root}
  load_root: ${root}
  
########################################################################
# GENERATION
########################################################################

generate:
  remote: False

shots: 0

training:
  lr: 1e-6

sampling:
  k: ${mode.k}
  temperature: ${mode.temperature}
  top_k: -1 
  top_p: 1.0
  seed: 1337
  logprobs: 0
  max_tokens: ${task.generation.max_response_length}
  gpu_memory_utilization: 0.95
  max_model_len: 4096
  num_blocks: ${mode.num_blocks}
  swap_space: 4
  tensor_parallel_size: 1
  num_workers: 1
  stop_strs: null
  min_p: 0.0
  elliptical:
    normalize_bonuses_per_step: True
    center_hidden_states_per_step: True

########################################################################
# REWARD LABELING
########################################################################

evaluation:
  model_type: reward
  collate_rewards: False
  save_copy: True
  include_prompt: False
  batch_size: 16
  max_response_length: ${task.generation.max_response_length}
  max_prompt_length: ${task.generation.max_prompt_length}
  collect_gradients: False
  code_contests:
    num_workers: 32
  collect_second_to_last_hidden_states: False
  collect_last_hidden_states: False
  collect_mean_hidden_states: True

########################################################################
# INFERENCE
########################################################################

repeats: 50
refresh_data: False
use_multiprocessing: True
max_threads: 44
use_subsampling: True

ks: 
  kmax: 13
  kmin: 2 
  inc: 1.0

betas: 
  - 0.0001
  - 0.0005
  - 0.001
  - 0.005
  - 0.01
  - 0.05
  - 0.1
  - 0.5
  # - 0.00001
  # - 0.00003
  # - 0.0001
  # - 0.0003
  # - 0.001
  # - 0.003
  # - 0.01
  # - 0.03
  # - 0.1
  # - 0.3
  # - 0.5


aoai:
  api_version: '2024-10-21'
  model_name: 'gpt-4o-mini' 
  model_version: '2024-07-18' # 'turbo-2024-04-09'  # '2024-11-20'  # Ensure this is a valid model version.  See above link for list
  instance: 'anonymousne/shared'  # Ensure this is a valid instance name

model_types: ['llama32medium', 'mistral7b']

plot:
  root: ${user.root}/amlt
  subsample_size: null
  min_number_unique_answers: null
  max_n: null
  load_features: False

preprocess_amlt:
  root: ${user.root}/amlt

collator:
  name: armo-rm

checkpoint_dir: null

token_level:
  batch_size: 64

# holdout:  
#   samples: ${ks.kmax}
#   prefix: ${task.name}-${policy.name}-holdout-fast-shots-0



