# Shared configurations across all methods
shared:
  # Model configuration
  model:
    model_name: "Qwen/Qwen2.5-3B-Instruct"
    trust_remote_code: true
    use_qlora: false # Enable/disable QLoRA (will be auto-disabled for distributed evaluation)
    fp16: true  # Whether to use FP16 precision

  # QLoRA configuration (if use_qlora is true)
  qlora:
    load_in_4bit: true
    bnb_4bit_quant_type: "nf4"
    bnb_4bit_compute_dtype: "float16"
    bnb_4bit_use_double_quant: true

  # LoRA configuration
  lora:
    r: 32
    lora_alpha: 32
    target_modules: ["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"]
    lora_dropout: 0.1
    bias: "none"
    task_type: "CAUSAL_LM"

# Distributed training configuration
distributed:
  enabled: false  # Set to false to disable distributed evaluation
  backend: "nccl"  # Backend for distributed training
  nproc_per_node: 2  # Number of processes per node
  find_unused_parameters: false  # Whether to find unused parameters
  static_graph: true  # Whether to use static graph

# Reference model configuration (for winning rate)
reference_model:
  model_name: "Qwen/Qwen2.5-3B-Instruct"
  trust_remote_code: true

# Annotation model configuration (for winning rate and PPR)
annotation_model:
  model_name: ""
  trust_remote_code: true

# Evaluation configuration
evaluation:
  output_dir: "evaluation_results/1"
  batch_size: 4  # Will be automatically adjusted for distributed evaluation
  max_length: 500
  num_points: 1000  # Number of random samples to use for evaluation (set to -1 to use all data)
  metrics: ["winning_rate", "PPR"]  # Metrics to compute "winning_rate", "PPR"
  w: 0.8

# Dataset configuration
dataset:
  test_path: "datasets/gpt/pairwise_dataset_w0.6_n10000.json"

# Tracking configuration
tracking:
  wandb_project: "rlhf-experiments"
  run_name: "evaluation-run"
  log_every_n_steps: 10  # How often to log during evaluation

# Authentication
auth:
  hf_token: ""
  wandb_token: ""

# Models to evaluate
models:
  beta_0: "saved_models/proposed_model/beta_0"
  dpo: "saved_models/dpo_model/"
