#!/bin/bash
set -ex

while [[ $# -gt 0 ]]; do
    case $1 in
        --source-model-path)
            SOURCE_MODEL_PATH="$2"
            shift 2
            ;;
        --code-model-path)
            CODE_MODEL_PATH="$2"
            shift 2
            ;;
        --test-case-model-path)
            TEST_CASE_MODEL_PATH="$2"
            shift 2
            ;;
        --benchmark_name)
            BENCHMARK_NAME="$2"
            shift 2
            ;;
        --n_samples)
            N_SAMPLES="$2"
            shift 2
            ;;
        --idx)
            IDX="$2"
            shift 2
            ;;
        *)
            echo "Invalid argument: $1"
            exit 1
            ;;
    esac
done

export HF_ENDPOINT=https://hf-mirror.com
export HF_HOME=/path/to/folder/huggingface
unset VLLM_USE_MODELSCOPE
source /path/to/folder/miniconda3/bin/activate
conda activate tcs

if [ -n "$SOURCE_MODEL_PATH" ]; then
    bash /path/to/file/scripts/to_hf_ds_7b_merge_500stages.sh --base-path $SOURCE_MODEL_PATH --save-path $CODE_MODEL_PATH
fi
BENCHMARK_NAME=${BENCHMARK_NAME:-"lcb"}
IDX=${IDX:-0}


CODE_MODEL_NAME=$(basename "$CODE_MODEL_PATH")
TEST_CASE_MODEL_NAME=$(basename "$TEST_CASE_MODEL_PATH")
if [ "$BENCHMARK_NAME" = "lcb" ]; then
    DATA_PATH="/path/to/folder/data/livecodebench_2408_2502_tagged_public.pkl"
    N_SAMPLES=${N_SAMPLES:-32}
elif [ "$BENCHMARK_NAME" = "taco" ]; then
    DATA_PATH="/path/to/folder/data/taco_for_test_evaluation.pkl"
    N_SAMPLES=${N_SAMPLES:-16}
fi
CODE_OUTPUT_PATH="/path/to/file/eval/${BENCHMARK_NAME}/${CODE_MODEL_NAME}_${TEST_CASE_MODEL_NAME}/code_output_${IDX}.pkl"
CODE_OUTPUT_PATH_REWARD_SCORE="/path/to/file/eval/${BENCHMARK_NAME}/${CODE_MODEL_NAME}_${TEST_CASE_MODEL_NAME}/code_output_${IDX}_final.pkl"
TEST_CASE_INPUT_PATH="/path/to/file/eval/${BENCHMARK_NAME}/${CODE_MODEL_NAME}_${TEST_CASE_MODEL_NAME}/test_case_input_${IDX}.pkl"
TEST_CASE_OUTPUT_PATH="/path/to/file/eval/${BENCHMARK_NAME}/${CODE_MODEL_NAME}_${TEST_CASE_MODEL_NAME}/test_case_output_${IDX}.pkl"
TEST_CASE_OUTPUT_JSON="/path/to/file/eval/${BENCHMARK_NAME}/${CODE_MODEL_NAME}_${TEST_CASE_MODEL_NAME}/test_case_output_${IDX}.json"
LIVECODEBENCHDIR="/path/to/folder/data/${CODE_MODEL_NAME}_${TEST_CASE_MODEL_NAME}_test_case_output_${IDX}"

# Step 1: Generate the solution.
if [ ! -f "$CODE_OUTPUT_PATH" ]; then
    response_length=8192; t=0.8; bash /path/to/file/scripts/eval.sh --model-path $CODE_MODEL_PATH --data-path $DATA_PATH --output-path $CODE_OUTPUT_PATH --n-samples $N_SAMPLES --response-length $response_length --t $t
fi

# Step 2.1: Prepare the test case file.
if [ ! -f "$TEST_CASE_INPUT_PATH" ]; then
    if [ "$BENCHMARK_NAME" = "lcb" ]; then
        python /path/to/file/jupyter/tss/modify_test_case.py --input_file $CODE_OUTPUT_PATH --output_file $TEST_CASE_INPUT_PATH
    elif [ "$BENCHMARK_NAME" = "taco" ]; then
        python /path/to/file/jupyter/tss/modify_test_case_taco.py --input_file $CODE_OUTPUT_PATH --output_file $TEST_CASE_INPUT_PATH
    fi
fi

# Step 2.2: Generate the test case.
if [ ! -f "$TEST_CASE_OUTPUT_PATH" ]; then
    n_samples=1; response_length=8192; t=0.8; bash /path/to/file/scripts/eval.sh --model-path $TEST_CASE_MODEL_PATH --data-path $TEST_CASE_INPUT_PATH --output-path $TEST_CASE_OUTPUT_PATH --n-samples $n_samples --response-length $response_length --t $t --evaluation False
fi

# Step 3: Extract the test case.
if [ ! -f "$TEST_CASE_OUTPUT_JSON" ]; then
    python /path/to/file/jupyter/tss/tss_processing.py --input_path $TEST_CASE_OUTPUT_PATH
fi

if [ "$BENCHMARK_NAME" = "lcb" ]; then
    # Step 4: Add generated test case to the benchmark dir.
    python /path/to/file/verl/utils/reward_score/livecodebench/lcb_runner/benchmarks/code_generation.py --test_cases_file $TEST_CASE_OUTPUT_JSON --output_dir $LIVECODEBENCHDIR
elif [ "$BENCHMARK_NAME" = "taco" ]; then
    # Step 4: Replace the test case in the $CODE_OUTPUT_PATH with the generated test case.
    python /path/to/file/jupyter/tss/taco_test_case_replace.py --input_file $TEST_CASE_OUTPUT_JSON --output_file $CODE_OUTPUT_PATH
fi

# Step 5: run `eval.sh` to get the results in generated test case.
bash /path/to/file/scripts/eval.sh --output-path $CODE_OUTPUT_PATH --livecodebench-dir $LIVECODEBENCHDIR --n-samples $N_SAMPLES --complete-evaluation True

if [ "$BENCHMARK_NAME" = "lcb" ]; then
    # Step 6: Process the result in `tss_sample_new.py`.
    python /path/to/file/jupyter/tss/tss_sample_new.py --response_path $CODE_OUTPUT_PATH
    python /path/to/file/jupyter/tss/tss_sample_new.py --response_path $CODE_OUTPUT_PATH --validate_test_cases
elif [ "$BENCHMARK_NAME" = "taco" ]; then
    python /path/to/file/jupyter/tss/tss_sample_taco.py --response_path $CODE_OUTPUT_PATH
    python /path/to/file/jupyter/tss/tss_sample_taco.py --response_path $CODE_OUTPUT_PATH --validate_test_cases
fi

# Step 7: Run the reward model.
python /path/to/file/jupyter/tss/reward_model_internlm.py --data_path $CODE_OUTPUT_PATH_REWARD_SCORE
python /path/to/file/jupyter/tss/reward_model_qrm.py --data_path $CODE_OUTPUT_PATH_REWARD_SCORE