#!/bin/bash

groups=("men" "women" "black" "white" "hispanic" "asian" "judaism" "islam" "christianity");
percents=(0.1 0.5 0.9);

cd ../MatchSum/preprocess;
rm -rf ./temp
pwd

for i in ${percents[@]}; do
    for g in ${groups[@]}; do
        for f in ../../data/synthetic_data/matchsum/single_group/${g}/*.jsonl; do
            filename=$(basename $f);
            index_path="../../data/synthetic_data/presumm/single_group/${g}/${i}_${filename%.jsonl}_step-1.id";
            write_path="../../data/synthetic_data/matchsum/single_group/${g}/processed/${filename}";
            echo ${filename%.jsonl}
            echo $f;
            echo $index_path;
            echo $write_path;
            python3 get_candidate.py --tokenizer roberta --data_path $f --index_path $index_path --write_path $write_path --n_cpus 20;
        done
    done
done

# Multigroup version
# for x in ../../data/synthetic_data/matchsum/multigroup/*.jsonl; do
#     filename=$(basename $x)
#     base=${filename%.jsonl}
#     index_path="../../data/synthetic_data/matchsum/multigroup/${base}_step-1.id"
#     write_path="../../data/synthetic_data/matchsum/multigroup/${base}/processed.jsonl"


#     echo $base $write_path
#     python3 get_candidate.py --tokenizer roberta --data_path $x --index_path $index_path --write_path $write_path --n_cpus 20
# done