#!/bin/bash

# 在远程服务器上运行重复率分析脚本

# 三个实验目录

# DIR1="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-dapo-math-redo/valid"
DIR2="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-grpo-dapo-math-minibsz32/training_data"
DIR3="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/baseline-gspo-dapo-math-minibsz32/training_data"
DIR4="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/skip-right-skip-limits10-gspo-dapo-math/training_data"
# DIR5="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/skip-right-skip-limits10-dapo-math/valid"
DIR6="/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/verl-041-result/verl-qwen3-4b-oct/skip-right-skip-limits10-grpo-dapo-math/training_data"


# 输出图表路径
OUTPUT="/mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/eval_scripts/analysis/plots/qwen3-4b-base-ngram-diversity-comparison-train.png"



python3 analysis_repetition_penalty_train.py \
  -d "$DIR2" \
  -d "$DIR3" \
  -d "$DIR4" \
  -d "$DIR6" \
  --save-data /mnt/shared-storage-user/p1-shared/wangfuting/codes/project_tts_extrapolation/eval_scripts/analysis/plots/qwen3-4b-base-ngram-analysis_results-train.json \
  -p "step_*_traindata.jsonl" \
  -s \
  -o "$OUTPUT" \
  -w 10 \
  --compute-ngrams \
  --no-plot \
  --tokenizer "/mnt/shared-storage-user/p1-shared/wangfuting/shared/models/Qwen3-4B-Base"

echo "分析完成！图表已保存到: $OUTPUT"

