prepare_fairseq_manifest() {
  flac_dataset_path=$1   # Path to dataset with .flac files
  manifest_output_dir=$2 # Path to save manifest files
  rename_flag=$3
  manifest_prefix=$4 # Prefix for manifest files (e.g., dict, myset, etc.)

  mkdir -p "$manifest_output_dir"

  echo "🔹 Generating manifest..."
  python examples/wav2vec/wav2vec_manifest.py \
    "$flac_dataset_path" \
    --dest "$manifest_output_dir" \
    --ext flac \
    --valid-percent 0.0

  echo "🔹 Generating labels..."
  python libri_labels.py \
    "$manifest_output_dir/train.tsv" \
    --output-dir "$manifest_output_dir" \
    --output-name train

  if [ "$rename_flag" = "true" ]; then
    echo "🔹 Renaming outputs with prefix '$manifest_prefix'..."
    for ext in tsv ltr wrd; do
      if [ -f "$manifest_output_dir/train.$ext" ]; then
        mv "$manifest_output_dir/train.$ext" "$manifest_output_dir/$manifest_prefix.$ext"
      fi
    done
  fi

  echo "🔹 Creating dictionary file dict.ltr.txt..."
  tr -s ' ' '\n' <"$manifest_output_dir/$manifest_prefix.ltr" |
    sort |
    uniq -c |
    awk '{print $2 " " $1}' \
      >"$manifest_output_dir/$manifest_prefix.ltr.txt"

  echo "✅ Done. Manifest written to $manifest_output_dir with prefix '$manifest_prefix'"
}

prepare_fairseq_manifest /data/shared_data/librispeech/LibriSpeech/dev-clean/ /manifest true dev_clean
prepare_fairseq_manifest /data/shared_data/librispeech/LibriSpeech/test-clean/ /manifest true test_clean
prepare_fairseq_manifest /data/shared_data/librispeech/LibriSpeech/dev-other/ /manifest true dev_other
prepare_fairseq_manifest /data/shared_data/librispeech/LibriSpeech/test-other/ /manifest true test_other
prepare_fairseq_manifest /data/shared_data/librispeech/LibriSpeech/train-clean-100/ /manifest false train
cp ./dict.ltr.txt /manifest/
