#!/bin/bash

set -euo pipefail

# ./scripts/setup.sh

function run {
    python3 experiment/phbf/construct_phbf.py \
        --all_pos_key_path "$all_pos_key_path" \
        --all_pos_X_path "$all_pos_X_path" \
        --X_val_key_path "$X_val_key_path" \
        --X_val_path "$X_val_path" \
        --y_val_path "$y_val_path" \
        --X_test_key_path "$X_test_key_path" \
        --X_test_path "$X_test_path" \
        --y_test_path "$y_test_path" \
        --bit_sizes "${bit_sizes[@]}" \
        --hash_counts "${hash_counts[@]}" \
        --pos_query_ratios "${pos_query_ratios[@]}" \
        --query_nums "${query_nums[@]}" \
        --model_dir_root "$model_dir_root" \
        --result_dir_root "$result_dir_root"
}

datasets=("url" "ember")
for dataset in "${datasets[@]}"; do
    model_dir_root="models/${dataset}/phbf"
    result_dir_root="results/${dataset}/phbf"
    all_pos_key_path="data/${dataset}/preprocessed/all_pos_key.csv"
    all_pos_X_path="data/${dataset}/preprocessed/all_pos_X.csv"
    X_val_key_path="data/${dataset}/preprocessed/X_val_key.csv"
    X_val_path="data/${dataset}/preprocessed/X_val.csv"
    y_val_path="data/${dataset}/preprocessed/y_val.csv"
    X_test_key_path="data/${dataset}/preprocessed/X_test_key.csv"
    X_test_path="data/${dataset}/preprocessed/X_test.csv"
    y_test_path="data/${dataset}/preprocessed/y_test.csv"

    # memory <-> fpr trade-off
    if [ "$dataset" == "url" ]; then
        bit_sizes=(
            400000 800000 
            1200000 1600000 
            2000000 2400000
            2800000
        )
        hash_counts=(
            10 20 30
        )
    else
        bit_sizes=(
            800000 1600000
            2400000 3200000
            4000000 4800000
            5600000
        )
        hash_counts=(
            10 20 30
        )
    fi
    pos_query_ratios=(0.0)
    query_nums=(40000)
    run
done
