```bash
#!/bin/bash

# Define variables
MODEL_ROOT="xxxxxx"
MODEL_PATHS=("${MODEL_ROOT}/Kimina-Autoformalizer-7B" "${MODEL_ROOT}/StepFun-Formalizer-7B" "${MODEL_ROOT}/Goedel-Formalizer-V2-8B" "${MODEL_ROOT}/Goedel-Formalizer-V2-32B" "${MODEL_ROOT}/StepFun-Formalizer-32B")

# DATA_NAME="combibench"
DATA_NAMES=("combibench" "proverbench" "formalmath-lite")
TIMES=(1)
DATA_PATH="/mnt/dolphinfs/ssd_pool/docker/user/hadoop-nlp-sh02/o1/guoqi/prover/formalizer_eval_submit/data"
QWQ_PATH="/mnt/dolphinfs/ssd_pool/docker/user/hadoop-nlp-sh02/o1/guoqi/model/QWQ-32B"
QWEN3_PATH="/mnt/dolphinfs/ssd_pool/docker/user/hadoop-nlp-sh02/o1/guoqi/model/Qwen3-32B"

GPU_NUM=8
NUM=16
FORMALIZATION_PARTITION_NUM=4
CONSISTENCY_PARTITION_NUM=4
TEMPERATURE=0.6

for times in "${TIMES[@]}"; do
    for MODEL_PATH in "${MODEL_PATHS[@]}"; do
        MODEL_NAME=$(basename "$MODEL_PATH")
        echo "MODEL_NAME is: $MODEL_NAME"
        # Iterate through each DATA_NAME
        for DATA_NAME in "${DATA_NAMES[@]}"; do
            echo "Processing DATA_NAME: $DATA_NAME"

            # Check and create folder
            if [ ! -d "$DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}" ]; then
                mkdir -p "$DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}"
                echo "Directory $DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times} has been created."
            else
                echo "Directory $DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times} already exists."
            fi

            # Check if ${MODEL_NAME}.jsonl exists in temp folder
            if [ -f "$DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}/${DATA_NAME}.jsonl" ]; then
                echo "File $DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}/${DATA_NAME}.jsonl already exists, skipping copy."
            else
                echo "Copying $DATA_PATH/${DATA_NAME}.jsonl to $DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}/"
                cp "$DATA_PATH/${DATA_NAME}.jsonl" "$DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}/"
            fi

            # Check if partition files exist
            if [ -f "$DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}/${DATA_NAME}_part_0.jsonl" ]; then
                echo "File $DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}/${DATA_NAME}_part_0.jsonl already exists, skipping partition."
            else
                echo "Partition ${DATA_NAME}.jsonl into 4 files"
                python3 partition.py --input_path "$DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}/${DATA_NAME}.jsonl" --n $FORMALIZATION_PARTITION_NUM
            fi

            # Iterate through each partitioned jsonl file, call formalization.py

            GPU_PER_PART=$((GPU_NUM / FORMALIZATION_PARTITION_NUM))

            # Function to generate GPU allocation string
            get_gpu_ids() {
                local idx=$1
                local num_groups=$((GPU_NUM / GPU_PER_PART))
                local group_idx=$((idx % num_groups))
                local start=$((group_idx * GPU_PER_PART))
                local end=$((start + GPU_PER_PART - 1))
                local ids=""
                for ((i=start; i<=end; i++)); do
                    if [ -z "$ids" ]; then
                        ids="$i"
                    else
                        ids="$ids,$i"
                    fi
                done
                echo "$ids"
            }

            pids=()
            part_idx=0
            output_files=()
            for part_file in "$DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}/${DATA_NAME}_part_"[0-9].jsonl; do
                if [ -f "$part_file" ]; then
                    gpu_ids=$(get_gpu_ids $part_idx)
                    log_file="${part_file}.log"
                    output_file="${part_file/.jsonl/_with_statements.jsonl}"
                    output_files+=("$output_file")
                    echo "Using nohup to call formalization.py to process $part_file, specifying GPU $gpu_ids, output to $output_file, log output to $log_file"
                    nohup python3 formalization.py --input_path "$part_file" --output_path "$output_file" --gpu $gpu_ids --n $NUM --temperature $TEMPERATURE --model $MODEL_PATH > "$log_file" 2>&1 &
                    pids+=($!)
                    part_idx=$((part_idx+1))
                fi
            done

            # Wait for all python processes to complete
            for pid in "${pids[@]}"; do
                wait $pid
            done

            echo "All formalization.py executions completed."

            # Merge all output_path files
            merged_output="$DATA_PATH/${DATA_NAME}/${MODEL_NAME}_num=${NUM}_t=${TEMPERATURE}_temp_${times}/${DATA_NAME}_with_statements.jsonl"
            if [ -f "$merged_output" ]; then
                echo "File $merged_output already exists, skipping merge."
            else
                # Construct input_path_list parameter
                input_path_list=$(IFS=,; echo "${output_files[*]}")
                echo "Calling merge.py to merge files, output to $merged_output"
                python3 merge.py --input_path_list "$input_path_list" --output_path "$merged_output"
            fi

            # Call lean4_checkin.py
            checkin_output="${merged_output/.jsonl/_checkin.jsonl}"
            echo "Calling lean4_checkin.py, input file is $merged_output, output file is $checkin_output"
            python3 lean4_checkin.py --input_path "$merged_output" --output_path "$checkin_output"


            # Split checkin_output
            checkin_part_prefix="${checkin_output%.jsonl}_part_"
            if [ -f "${checkin_part_prefix}0.jsonl" ]; then
                echo "File ${checkin_part_prefix}0.jsonl already exists, skipping split."
            else
                echo "Partition $checkin_output into 4 files"
                python3 partition.py --input_path "$checkin_output" --n $CONSISTENCY_PARTITION_NUM
            fi


            GPU_PER_PART=$((GPU_NUM / CONSISTENCY_PARTITION_NUM))

            # Function to generate GPU allocation string
            get_gpu_ids() {
                local idx=$1
                local num_groups=$((GPU_NUM / GPU_PER_PART))
                local group_idx=$((idx % num_groups))
                local start=$((group_idx * GPU_PER_PART))
                local end=$((start + GPU_PER_PART - 1))
                local ids=""
                for ((i=start; i<=end; i++)); do
                    if [ -z "$ids" ]; then
                        ids="$i"
                    else
                        ids="$ids,$i"
                    fi
                done
                echo "$ids"
            }

            # Execute consistency_judge.py in parallel (QWQ model)
            pids=()
            part_idx=0
            for part_file in "${checkin_output%.jsonl}_part_"[0-9].jsonl; do
                if [ -f "$part_file" ]; then
                    log_file="${part_file%.jsonl}_consistency_QWQ.log"
                    output_file="${part_file%.jsonl}_consistency_QWQ.jsonl"
                    gpu_ids=$(get_gpu_ids $part_idx)
                    echo "Using nohup to call consistency_judge.py to process $part_file, model parameter is QWQ, GPU parameter is $gpu_ids, output is $output_file, log output to $log_file"
                    nohup python3 consistency_judge.py --input_path "$part_file" --output_path "$output_file" --model "$QWQ_PATH" --gpu "$gpu_ids" > "$log_file" 2>&1 &
                    pids+=($!)
                    part_idx=$((part_idx+1))
                fi
            done

            # Wait for all consistency_judge.py to complete
            for pid in "${pids[@]}"; do
                wait $pid
            done


            # Merge all QWQ consistency results
            q_consistency_files=()
            for part_file in "${checkin_output%.jsonl}_part_"[0-9].jsonl; do
                q_consistency_file="${part_file%.jsonl}_consistency_QWQ.jsonl"
                if [ -f "$q_consistency_file" ]; then
                    q_consistency_files+=("$q_consistency_file")
                fi
            done

            merged_q_consistency="${checkin_output%.jsonl}_consistency_QWQ.jsonl"
            if [ -f "$merged_q_consistency" ]; then
                echo "File $merged_q_consistency already exists, skipping merge."
            else
                input_path_list=$(IFS=,; echo "${q_consistency_files[*]}")
                echo "Calling merge.py to merge QWQ consistency results, output to $merged_q_consistency"
                python3 merge.py --input_path_list "$input_path_list" --output_path "$merged_q_consistency"
            fi


            # Execute consistency_judge.py in parallel (QWEN3 model)
            pids=()
            part_idx=0
            for part_file in "${checkin_output%.jsonl}_part_"[0-9].jsonl; do
                if [ -f "$part_file" ]; then
                    log_file="${part_file%.jsonl}_consistency_QWEN3.log"
                    output_file="${part_file%.jsonl}_consistency_QWEN3.jsonl"
                    gpu_ids=$(get_gpu_ids $part_idx)
                    echo "Using nohup to call consistency_judge.py to process $part_file, model parameter is QWEN3, GPU parameter is $gpu_ids, output is $output_file, log output to $log_file"
                    nohup python3 consistency_judge.py --input_path "$part_file" --output_path "$output_file" --model "$QWEN3_PATH" --gpu "$gpu_ids" > "$log_file" 2>&1 &
                    pids+=($!)
                    part_idx=$((part_idx+1))
                fi
            done

            # Wait for all consistency_judge.py to complete
            for pid in "${pids[@]}"; do
                wait $pid
            done

            # Merge all QWEN3 consistency results
            qwen3_consistency_files=()
            for part_file in "${checkin_output%.jsonl}_part_"[0-9].jsonl; do
                qwen3_consistency_file="${part_file%.jsonl}_consistency_QWEN3.jsonl"
                if [ -f "$qwen3_consistency_file" ]; then
                    qwen3_consistency_files+=("$qwen3_consistency_file")
                fi
            done

            merged_qwen3_consistency="${checkin_output%.jsonl}_consistency_QWEN3.jsonl"
            if [ -f "$merged_qwen3_consistency" ]; then
                echo "File $merged_qwen3_consistency already exists, skipping merge."
            else
                input_path_list=$(IFS=,; echo "${qwen3_consistency_files[*]}")
                echo "Calling merge.py to merge QWEN3 consistency results, output to $merged_qwen3_consistency"
                python3 merge.py --input_path_list "$input_path_list" --output_path "$merged_qwen3_consistency"
            fi

            consistency_res_save_path="${checkin_output%.jsonl}_consistency_results.txt"


            if [ -f "$consistency_res_save_path" ]; then
                echo "File $consistency_res_save_path already exists, skipping statistics."
            else
                echo "Calling final_process.py to calculate consistency results, output to $consistency_res_save_path"
                python3 final_process.py --qwq_path "$merged_q_consistency" --qwen3_path "$merged_qwen3_consistency" --res_save_path "$consistency_res_save_path"
            fi
            echo "Processing of DATA_NAME $DATA_NAME completed."
        done
        echo "Processing of MODEL_NAME $MODEL_NAME completed."
    done
    echo "Processing of TIMES $times completed."
done
echo "All datasets processing completed."
