#!/bin/bash

DATASET_NAME="HuggingFaceH4/MATH-500"
SPLIT="test"
MODEL_NAME="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
TOKENIZER_NAME="deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
OUTPUT_PATH="./ref_dataset_math500"
QUESTION_KEY="problem"
NUM_SAMPLES=-1
MAX_NEW_TOKENS=16384
MAX_LENGTH=32768
TEMPERATURE=0.0

python ref_dataset.py \
    --dataset_name "$DATASET_NAME" \
    --split "$SPLIT" \
    --model_name "$MODEL_NAME" \
    --tokenizer_name "$TOKENIZER_NAME" \
    --output_path "$OUTPUT_PATH" \
    --question_key "$QUESTION_KEY" \
    --num_samples $NUM_SAMPLES \
    --max_new_tokens $MAX_NEW_TOKENS \
    --max_length $MAX_LENGTH \
    --temperature $TEMPERATURE

echo "Reference dataset generation completed!"
echo "Output saved to: $OUTPUT_PATH"