# Number of rollout per data
sample:
  rollout_n: 1
# Model configuration
model:
  name: sft_checkpoints/TP/topiocqa_ance"
  smn: "topiocqa_ance"
  max_model_len: 2048
  tensor_parallel_size: 1
  gpu_memory_utilization: 0.8

# System configuration  
system:
  total_gpus: 2
  batch_size: 2000

# Data configuration
# /nonthink_woqd
data:
  input_dir: "test_data/topiocqa_0806"
  output_dir: "test_data/topiocqa_0806"
  input_files:
    # - "train_45450.jsonl"
    - "test.jsonl"

# Sampling parameters
sampling:
  temperature: 0.7
  top_p: 0.8
  top_k: 20
  max_tokens: 256

# 2 Score computation section
# 2.1 Custom reward function
custom_reward_fuction:
  path: "./juda_reward.py"
  function_name: "compute_score_pingxingjuda_with_citation"