hydra:
  run:
    dir: ${cache_path}/${task}/${model.path}/${dataset}/${now:%Y-%m-%d}/${now:%H-%M-%S}

model:
  type: CausalLM
  path_to_load_script: ./load_qwen.py
  path: Qwen/Qwen2.5-Math-7B
  load_model_args:
    device_map: auto
  load_tokenizer_args: {}

task: bio
dataset: ['user/test_gsm8k']
text_column: question
label_column: answer
eval_split: train
load_from_disk: false
subsample_eval_dataset: -1
use_claim_ue: true

generation_metrics: [{
  'name': 'StepFactCheck',
  'kwargs': {
    'prompt_file': 'configs/qwen3_prompt_general.txt',
    'model': 'gpt-4o',
    'n_threads': 16,
    'progress_bar': false
  }
}]

estimators:
  - name: RandomBaselineClaim
  - name: MaximumClaimProbability
  - name: ClaimConditionedProbabilityClaim
    cfg:
       nli_context: "no_context"
  - name: ClaimConditionedProbabilityClaim
    cfg:
       nli_context: "fact_pref"
  - name: PTrueClaim
  - name: StepsSemanticEntropy
  - name: StepsLexicalSimilarity
  - name: StepsDegMat
  - name: StepsEccentricity
  - name: StepsNumSemSets
  - name: StepsDissimilarity
    cfg:
      similarity: "rougeL"
  - name: StepsDissimilarity
    cfg:
      similarity: "nli_entail"
  - name: StepsDissimilarity
    cfg:
      similarity: "nli_contra"
  - name: MaxTokenEntropyClaim
  - name: PerplexityClaim

stat_calculators:
  - auto
  - name: ClaimExtractor
    builder: synthetic_dataset_generation.utils.steps_extractor
    cfg:
    stats:
      - "claims"
      - "claim_texts_concatenated"
      - "claim_input_texts_concatenated"
    dependencies:
      - "greedy_texts"
      - "greedy_tokens"

max_new_tokens: 1024
ignore_exceptions: false
language: en
batch_size: 1
deberta_batch_size: 100
seed:
    - 1
cache_path: ./workdir/cache
save_path: '${hydra:run.dir}'
report_to_wandb: true

n_threads: 16
log_time: true

output_attentions: false
