

# Define the number of chunks
num_chunks=40
# Define the directory to save the logs
log_dir="./extract_ft_dataset_log_cancer_pancreatic_low_risk"
patient_file_path="/data/nhird_tasks_cancer_low_risk/cancer_screening_dataset_pancreatic_1.json/*"
ids_file_path="/data/cancer_pancreatic_low_risk/"
# Create the log directory if it doesn't exist
mkdir -p "$log_dir"

# Loop from 0 to num_chunks-1
for chunk_idx in $(seq 0 $((num_chunks - 1)))
do
    echo "Processing chunk $chunk_idx"
    # Define the log file for each chunk
    log_file="$log_dir/chunk_${chunk_idx}.log"  
    # Run the Python script with the current chunk_idx in the background and redirect output to log file
    python extract_ft_dataset.py --patient_file_path "$patient_file_path" --ids_file_path "$ids_file_path" --num_chunks $num_chunks --chunk_idx $chunk_idx > "$log_file" 2>&1 &
done

# Wait for all background processes to finish
wait

python split_ft_dataset.py --ids_file_path "$ids_file_path" 
