{
  "job_type": "self_evolve_cl_local",
  "dataset": {
    "name": "gsm8k",
    "trainset_path": "training_data/gsm8k/proportion/rewrites.prop25.jsonl",
    "valset_path": "training_data/gsm8k/val.jsonl",
    "seed": 42
  },
  "models": {
    "student": "facebook/opt-2.7b"
  },
  "curriculum": {
    "eval_steps": 50,
    "eval_questions": 50,
    "alpha": 0.75,
    "beta": 0.75,
    "epsilon": 0.05,
    "temperature": 25,
    "sampling_strategy": "sequential",
    "policy": "boltzmann"
  },
  "training": {
    "output_dir": "trained_models/gsm8k/opt-2.7b/ablation/prop25/",
    "max_steps": 5000,
    "per_device_train_batch_size": 8,
    "gradient_accumulation_steps": 1,
    "max_length": 2048,
    "save_strategy": "no",
    "logging_steps": 1,
    "learning_rate": 1e-5,
    "weight_decay": 0.05,
    "warmup_ratio": 0.1,
    "lr_scheduler_type": "constant",
    "report_to": "wandb",
    "save_only_model": true
  },
  "validation": {
    "mode": "math",
    "sbert": {
      "model_path": "all-MiniLM-L6-v2",
      "device": "cpu",
      "batch_size": 128,
      "force_offline": false,
      "num_threads": 1
    },
    "verifier": {
      "sim_threshold": 0.75,
      "f1_threshold": 0.90,
      "containment_use_full_response": false,
      "remove_stopwords": true,
      "gen_emb_cache_cap": 20000
    },
    "generation": {
      "max_input_len": 1024,
      "max_new_tokens": 256,
      "temperature": 0.0,
      "answer_cue": ""
    }
  }
}