#! /bin/bash

export PYTHONPATH="./:$PYTHONPATH"

# deduplicate
# INPUT_PATH="src/turtlegfx_datagen/codededup/data/postprocess_top100.json"
INPUT_PATH="src/turtlegfx_datagen/codededup/results/merged_test.json"
OUTPUT_PATH="src/turtlegfx_datagen/codededup/results/postprocess_top100_dedup.json"
EPS=0.2 # EPS=0.2 is the best (0.3 is too high)
MIN_SAMPLES=2
BATCH_SIZE=32

python src/turtlegfx_datagen/codededup/deduplicate.py \
    --input_path $INPUT_PATH \
    --output_path $OUTPUT_PATH \
    --eps $EPS \
    --min_samples $MIN_SAMPLES \
    --batch_size $BATCH_SIZE \
    --remove_duplicates \
    --save_dup_images
    
