#! /bin/bash
# This is a script for train-valid split.
##################################################################################################
### **Default parameters**
TEST_CASE_TYPES=(
  #"assert_specification"
  #"functionality_specification"
  "grammar_assert_specification"
)
DATASETS=(
  "mbpp"
  "humaneval"
)
##################################################################################################
### **Custom parameters**
VALID_RATIO=0.1 # This means test case ratio.

SEED=42 # Random seed

MODELS=(
  #"gpt-4o-mini"
  "o4-mini"
)

##################################################################################################
for TEST_CASE_TYPE in "${TEST_CASE_TYPES[@]}"; do
  for DATASET in "${DATASETS[@]}"; do
    for MODEL in "${MODELS[@]}"; do
      case "$TEST_CASE_TYPE" in
        assert_specification)
          DATA_PATH="../../code/output_gpt/${DATASET}/after_quality/${TEST_CASE_TYPE}/${MODEL}_gt_assert_only.jsonl"
          ;;
        functionality_specification)
          DATA_PATH="../../code/output_gpt/${DATASET}/after_quality/${TEST_CASE_TYPE}/${MODEL}_gt_and_model_ok.jsonl"
          ;;
        grammar_assert_specification)
          DATA_PATH="../../code/output_gpt/${DATASET}/after_quality/${TEST_CASE_TYPE}/${MODEL}/${MODEL}_train_data.jsonl"
          ;;
        *)
          echo "Unsupported TEST_CASE_TYPE: $TEST_CASE_TYPE" >&2
          exit 1
          ;;
      esac

      echo "============================================"
      echo "DATASET       : $DATASET"
      echo "TEST_CASE_TYPE: $TEST_CASE_TYPE"
      echo "Using data    : $DATA_PATH"
      echo "VALID_RATIO   : $VALID_RATIO"
      echo "SEED          : $SEED"
      echo "============================================"

      python ../../code/utils/data_train_valid_split.py \
        --data "$DATA_PATH" \
        --valid-ratio "$VALID_RATIO" \
        --seed "$SEED" \
        --model "$MODEL" \
        --test_case_type "$TEST_CASE_TYPE" \
        --dataset "$DATASET"
      done
    done
  done
