defaults:
  - model: soft_bc_lm
  - dataset@train_dataset: list_train
  - dataset@eval_dataset: list_val
  - evaluator: bc_evaluator
  - _self_

train_dataset:
  cache_id: d_train
  data:
    reward_cache: data/vis_dialogue/processed/visdial_0.5/train_rank_reward_cache1.json
    # additional_scenes: data/vis_dialogue/processed/visdial_0.5/is_it_sunny_events.pkl
    early_stops: true
    # reward_shift: 30.0
    # reward_scale: 1e6
    mode: env_stops
    cutoff_rule:
      name: percentile_cutoff_rule
      goal_value: 1.0
      percentile: 0.5
  token_reward:
    name: specified_token_reward
    token_file: data/wikitext/wikitext-103-train_gpt2_token_freq.json
    scale: 20.0
    shift: -1.0
  # top_p: 0.2

eval_dataset:
  cache_id: d_eval
  data:
    reward_cache: data/vis_dialogue/processed/visdial_0.5/val_rank_reward_cache1.json
    early_stops: true
    # reward_shift: 30.0
    # reward_scale: 1e6
    mode: env_stops
    cutoff_rule:
      name: percentile_cutoff_rule
      goal_value: 1.0
      percentile: 0.5
  token_reward:
    name: specified_token_reward
    token_file: data/wikitext/wikitext-103-train_gpt2_token_freq.json
    scale: 20.0
    shift: -1.0

model:
  percentile: 0.8
  beta: 0.9
  transition_weight: 0.0
  dataset:
    name: vis_dial_list_dataset
    cache_id: d_train
  load:
    checkpoint_path: null
    strict_load: true

evaluator:
  env:
    url: http://localhost:5001/step_rank
    actor_stop: false
    dataset:
      name: vis_dial_list_dataset
      cache_id: d_eval
    # reward_shift: 30.0
    # reward_scale: 1e6
  verbose: true
  kind: sample
  generation_kwargs:
    # temp: 1.0
    num_generations: 1
    max_generation_len: 40 
    # top_k: null
    # top_p: null

train:
  save_checkpoint_dir: outputs/visual_dialogue/visdial_env_cuttoff_frequency_soft_bc_test3/
  optim_state_path: null
  epochs: 10000000
  dataloader_workers: 1
  bsize: 2
  grad_accum_steps: 128
  log_every: 256
  eval_every: 4096
  save_every: 16384
  max_checkpoints: 4
  eval_bsize: 32
  eval_batches: 1
  lr: 1e-4
  weight_decay: 0.00
  max_steps: null
  loss: {}

wandb:
  use_wandb: true
  wandb_project: visdial_iql
