#!/bin/bash

# 语义重复分析运行脚本

# 基础路径
BASE_PATH="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-8b-new"
MODEL_PATH="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/Qwen3-Embedding-0.6B"

# 配置数组：每个元素包含模型目录名
MODELS=(
    "stage2-best-deepscaler-sampled2k"
    "stage1-best-deepscaler-sampled2k"
    "baseline-n32-deepscaler-sampled2k"
)

# 通用参数
BATCH_SIZE=1280
CHUNK_SIZE=256
RESPONSE_KEY="output"

# 循环执行分析
for MODEL_DIR in "${MODELS[@]}"; do
    echo "=========================================="
    echo "正在处理: ${MODEL_DIR}"
    echo "=========================================="
    
    INPUT_FILE="${BASE_PATH}/${MODEL_DIR}/valid/0_16384.jsonl"
    OUTPUT_FILE="${BASE_PATH}/${MODEL_DIR}/valid/semantic_${CHUNK_SIZE}.jsonl"
    
    # 检查输入文件是否存在
    if [ ! -f "${INPUT_FILE}" ]; then
        echo "警告: 输入文件不存在: ${INPUT_FILE}"
        echo "跳过此配置..."
        continue
    fi
    
    # 运行分析
    python analysis_semantic_repetition.py \
        --input "${INPUT_FILE}" \
        --output "${OUTPUT_FILE}" \
        --model "${MODEL_PATH}" \
        --chunk_size "${CHUNK_SIZE}" \
        --batch_size "${BATCH_SIZE}" \
        --response_key "${RESPONSE_KEY}"
    
    if [ $? -eq 0 ]; then
        echo "✓ ${MODEL_DIR} 分析完成！"
    else
        echo "✗ ${MODEL_DIR} 分析失败！"
    fi
    echo ""
done

echo "=========================================="
echo "所有分析任务完成！"
echo "=========================================="

