#!/bin/bash





SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)"
BASE_MODEL=Meta-Llama-3-8B-Instruct

source ${REPO_ROOT}/.venv/bin/activate





DATASET_NAMES=(







  "LLTM-apps-numeric-depth-train"
  "LLTM-pyx-numeric-depth-train"
  "LLTM-all-numeric-depth-train"
)

for DATASET_NAME in "${DATASET_NAMES[@]}"; do
  DATA_PATH="${REPO_ROOT}/scripts/instruction/convert_datasets/${DATASET_NAME}.jsonl"

  echo "Processing dataset: ${DATASET_NAME}"
  python ${SCRIPT_DIR}/utils/count_tokens.py \
      --hf-transformer-model-dir "meta-llama/${BASE_MODEL}" \
      --instruction-train-data-path "${DATA_PATH}" \
      --seq-length 8192

  echo "Done processing dataset: ${DATASET_NAME}"
  echo "---------------------------------------------"
done

python3 ${SCRIPT_DIR}/utils/send_to_slack.py "[count_tokens.py] Finished!"
