export HF_ENDPOINT=https://hf-mirror.com
hf auth login --token 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



SOLVER_PATH="./models/RL_Epoch6_full_sft_counterexample_solve"
PROVER_PATH="./models/RL_Epoch6_full_sft_formalproof_generate"
# SOLVER_PATH="./models/Qwen3-8B_counterexample_solve"
# PROVER_PATH="./models/dsproverv2-7b_full_sft_formalproof_generate"

timestamp=$(date +%Y%m%d_%H%M%S)
mkdir -p save/train_${timestamp}

# set vLLM port
export VLLM_PORT=50000 &&
export VLLM_DP_MASTER_PORT=54321 &&
export VLLM_USE_FLASHINFER_SAMPLER=0 && # https://github.com/vllm-project/vllm/issues/19483
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 && python src/train.py \
    --num_problems 10000 \
    --solver_name erdos \
    --prover_name erdos \
    --dataset_path "datasets/train_data.json" \
    --solver_path "${SOLVER_PATH}" \
    --prover_path "${PROVER_PATH}" \
    --solver_k 3 \
    --prover_k 3 \
    --default_header 0 \
    --gpu 8 \
    --max_tokens 4096 \
    --alpha 0.0 \
    --temperature 0.9 \
    --top_p 0.95 \
    --training_epochs 10 \
    --batch_size 1500 \
    --save_dir "save/train_${timestamp}" # > output.txt