#!/bin/bash

# 在远程服务器上运行重复率分析脚本
# 在远程服务器上运行重复率分析脚本
source /mnt/shared-storage-user/p1-shared/wangfuting/miniconda3/bin/activate verl041-test
cd /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/eval_scripts/analysis
# 三个实验目录
# DIR1="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/add1k-remove-upper-dapo-math-17k/valid"
# DIR1="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/dapo-add1k-remove-upper-refined-repetition-penalty-max9k/valid"
DIR1="/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/llama-baseline/valid_8k"
DIR2="/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/verl-qwen3-4b-oct/llama-LIE/valid_8k"

# 输出图表路径
OUTPUT="/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/eval_scripts/analysis_mar/results/llama-LIE-length.png"


# python3 analysis_repetition_penalty.py \
#   -p "*_16384.jsonl" \
#   -s \
#   -o "$OUTPUT" \
#   --dir "$DIR1" \
#   --ngram-sizes 3 5 \
#   --divergence-method js \
#   --plot-entropies

python3 analysis_repetition_penalty.py \
  -d "$DIR1" \
  -d "$DIR2" \
  --save-data /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/eval_scripts/analysis_mar/results/llama-LIE-analysis_results.json \
  -p "*_16384.jsonl" \
  -s \
  -o "$OUTPUT" \
  -w 10 \
  --compute-ngrams \
  --compute-sentence-diversity \
  --no-plot \
  --num-workers 16 \
  --tokenizer "/mnt/shared-storage-gpfs2/p1-shared-2/wangfuting/LIE/models/OctoThinker-3B-Long-Base"

echo "分析完成！图表已保存到: $OUTPUT"

# python3 analysis_repetition_penalty.py \
#   -d "$DIR1" \
#   -d "$DIR2" \
#   -p "*_16384.jsonl" \
#   -s \
#   -o "$OUTPUT" \