defaults:
  - model: v_token_iql
  - dataset: list_val
  - evaluator: top_advantage_utterances_evaluator
  - _self_

dataset:
  cache_id: d
  data:
    reward_cache: data/vis_dialogue/processed/visdial_0.5/val_rank_reward_cache1.json
    early_stops: true
    # reward_shift: 30.0
    # reward_scale: 1e6
    mode: env_stops
    cutoff_rule:
      name: percentile_cutoff_rule
      goal_value: 1.0
      percentile: 0.5
    # yn_reward: -2.0
  token_reward:
    name: specified_token_reward
    token_file: data/wikitext/wikitext-103-train_gpt2_token_freq.json
    scale: 20.0
    shift: -1.0

model:
  alpha: 0.005
  gamma: 0.99
  beta: 0.0
  transition_weight: 0.0
  clip_weight: null
  value_max: null
  value_min: null
  detach_v: false
  detach_q: false
  detach_pi: false
  double_v: true
  seperate_policy: false
  seperate_target: true
  tau: 0.5
  exp_weights: true
  dm_margin: 0.0
  gpt2:
    lm_head: true
    from_pretrained: true
  dataset:
    name: vis_dial_list_dataset
    cache_id: d
  load:
    checkpoint_path: outputs/visual_dialogue/visdial_rank_iql_frequency_cutoff_yn_penalty_simple_extraction_cql_test2/model_65535.pkl
    strict_load: true

evaluator:
  data:
    name: vis_dial_list_dataset
    cache_id: d

eval:
  dataloader_workers: 1
  bsize: 1
  batches: 1
  print_every: 1
  seed: 1
  loss:
    v_loss_weight: 1.0
    q_loss_weight: 1.0
    awac_weight: 1.0
    cql_loss_weight: 0.0
    dm_loss_weight: 0.0
    mc_returns: false
    
