hydra:
  run:
    dir: .
  sweep:
    dir: .
    subdir: .
  job_logging:
    root:
      level: INFO
  job:
    env_set:
      TOKENIZERS_PARALLELISM: "false"


defaults:
  - base_config  # see src/arguments.py
  - _self_


wandb:
  log: true
  entity: AUTHOR_NAME  # Change this to your wandb username.
  project: PROJECT_NAME  # Change this to your wandb project name.
  group: ${wandb.tag}
  name: ${wandb.group}-run-${training.seed}


model:
  # markers: Language, Protein, SMILES    
  # single domain tasks: Protein-Descriptor, SMILES-QED
  # multi domain tasks: DC, BA, Translate
  task_name: BA 
  exp_name: format
  #peft: #lora, prefixtuning, prompttuning
  token_dict_path: exp/BA # empty for markers, otherwise supply the path to the trained marker dictionary

  regression: true # true for Protein-Descriptor, SMILES-QED, DC, BA
  regression_out_dim: 1
  num_token_per_prompt: 10 # number of actual tokens per special token

  tied_weights: false # tie output regression weights with input embedding weights
  freeze_existing_tokens: false # freeze already-learned markers
  use_scalar_encode: false # use learned encoding for numerical values, else use character representation
  
  # ablation studies
  use_start_marker: true
  use_end_marker: false # end of domain markers
  use_functional_token: true
  add_ce_loss: true # add cross-entropy loss on non-special tokens in addition to regression loss during training
  inverse_prompting: true # for each training example, switch the order of the input and the output
  autoregressive_attn_mask: false

  model_name_or_path: llama-7b
  pretrained: true
  cache_dir: .cache/

  icl_method: 
  icl_num_demonstrations: 1
  icl_idx_dict_path: 


training:
  predict_with_generate: true
  generation_max_length: 650

  do_train: true
  do_eval: true

  output_dir: exp/${model.task_name}/${model.task_name}-${model.exp_name}-seed-${training.seed}

  report_to: "none"  # THIS MUST BE NONE. Use wandb args to control logging.

  dataloader_num_workers: 0  # If > 0, some weird process hanging might occur.

  # Default training params: effective batch size = 16
  num_train_epochs: 4
  fp16: true
  fp16_full_eval: true
  per_device_train_batch_size: 4
  per_device_eval_batch_size: 4
  gradient_accumulation_steps: 8

  # Save/eval every 1000 steps and track best model
  overwrite_output_dir: false  # Resume training from checkpoint if it exists.
  evaluation_strategy: steps
  save_strategy: steps
  eval_steps: 200000
  save_steps: 1000
  save_total_limit: 1
  load_best_model_at_end: false

  metric_for_best_model: rougeL
  greater_is_better: true
  gist:
    num_gist_tokens: 10

