

experiment:
  experiments_base_dir: /tmp/
  project_name: neruisp2024
  session_name: cpr_adamw
  experiment_name: test_run

seed: 1
data_seed: 1

cache_dir: '/tmp/cache'

model_name: "facebook/opt-125m"
replace_layer:
  - "q_proj"
  - "fc1"


#model_name: "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
#replace_layer:
#  - "q_proj"
#  - "v_proj"
#  - "k_proj"
#  - "o_proj"
#  - fc_1
#  - fc_2

#model_name: "mistralai/Mistral-7B-Instruct-v0.2"
#replace_layer:
#  - "q_proj"
#  - "v_proj"
#  - "k_proj"
#  - "o_proj"
##  - "gate_proj"
##  - "up_proj"
##  - "down_proj"

devices: 1
precision: bf16-true

learning_rate: 0.0001
warmup_steps: 50
weight_decay: 0.1
max_epochs: 1


noise_strength: 0.1
noise_top: 0.1
noise_bottom: 0.1
invert_noise: False
noise_just_svd: False
noise_percental_selection: False

lora_type: loralib
r: 128
lora_alpha: 1.0
lora_dropout: 0.0

reg_type: cpr # cpr adamw


cpr_param:  50
cpr_init: 'warm_start'
cpr_mu: 1.0

l2reg_lambda: 0.1

merge_weights: True

batch_size: 64
effective_batch_size: 2
max_seq_length: 500
gradient_clipping: 1.0

val_split: 0.1
dataset: pubmedqa
pubmedqa:
  labeled_oversample: 0
  artificial_numb: 100000
  unlabeled_numb: 0


eval_tasks:
  - pubmedqa
#  - truthfulqa_mc
#  - arithmetic_2da
#  - arithmetic_2ds
#  - arithmetic_3da
#  - arithmetic_3ds
#  - arithmetic_4da
#  - arithmetic_4ds
#  - arithmetic_5da
#  - arithmetic_5ds
#  - arithmetic_2dm
#  - arithmetic_1dc
#  - piqa
#  - hendrycksTest-college_biology
#  - hendrycksTest-college_chemistry
#  - hendrycksTest-college_computer_science
#  - hendrycksTest-college_mathematics
#  - hendrycksTest-college_medicine
#  - hendrycksTest-college_physics
#  - hendrycksTest-high_school_biology
#  - hendrycksTest-high_school_chemistry
#  - hendrycksTest-high_school_computer_science
#  - hendrycksTest-high_school_european_history
#  - hendrycksTest-high_school_geography
#  - hendrycksTest-high_school_government_and_politics
#  - hendrycksTest-high_school_macroeconomics
#  - hendrycksTest-high_school_mathematics
#  - hendrycksTest-high_school_microeconomics
#  - hendrycksTest-high_school_physics
#  - hendrycksTest-high_school_psychology
#  - hendrycksTest-high_school_statistics
#  - hendrycksTest-high_school_us_history
#  - hendrycksTest-high_school_world_history
#  - hendrycksTest-abstract_algebra
#  - hendrycksTest-anatomy
#  - hendrycksTest-astronomy
#  - hendrycksTest-business_ethics
#  - hendrycksTest-clinical_knowledge
#  - hendrycksTest-college_biology
#  - hendrycksTest-college_chemistry
#  - hendrycksTest-college_computer_science
#  - hendrycksTest-college_mathematics
#  - hendrycksTest-college_medicine
#  - hendrycksTest-college_physics
#  - hendrycksTest-computer_security
#  - hendrycksTest-conceptual_physics
#  - hendrycksTest-econometrics
#  - hendrycksTest-electrical_engineering
#  - hendrycksTest-elementary_mathematics
#  - hendrycksTest-formal_logic
#  - hendrycksTest-global_facts
#  - hendrycksTest-human_aging
#  - hendrycksTest-human_sexuality
#  - hendrycksTest-international_law
#  - hendrycksTest-jurisprudence
#  - hendrycksTest-logical_fallacies
#  - hendrycksTest-machine_learning
#  - hendrycksTest-management
#  - hendrycksTest-marketing
#  - hendrycksTest-medical_genetics
#  - hendrycksTest-miscellaneous
#  - hendrycksTest-moral_disputes
#  - hendrycksTest-moral_scenarios
#  - hendrycksTest-nutrition
#  - hendrycksTest-philosophy
#  - hendrycksTest-prehistory
#  - hendrycksTest-professional_accounting
#  - hendrycksTest-professional_law
#  - hendrycksTest-professional_medicine
#  - hendrycksTest-professional_psychology
#  - hendrycksTest-public_relations
#  - hendrycksTest-security_studies
#  - hendrycksTest-sociology
#  - hendrycksTest-us_foreign_policy
#  - hendrycksTest-virology
#  - hendrycksTest-world_religions