exp_name="toy_data505k"
num_gpus="5"
scale="small"

# make folder path
mkdir "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}"
mkdir "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}/shards"

# reshard
python resharder.py -i "/mnt/task_runtime/datacomp-small-data/shards" -o "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}/shards" -s "/mnt/task_runtime/datacomp-small-data/filtered_uids/${exp_name}.npy"

# train based on resharded dataset (one gpu)
# python train.py --scale $scale --data_dir "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}/shards" --output_dir "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}" --exp_name "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}"

# train based on resharded dataset (multiple gpus)
# torchrun --nproc_per_node $num_gpus train.py --scale $scale --data_dir "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}/shards" --output_dir "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}" --exp_name "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}"

# evaluation (without submission)
# python evaluate.py --train_output_dir="/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}" --data_dir="/mnt/task_runtime/datacomp-eval-data"
# python aggregate_scores.py --input "/mnt/task_runtime/datacomp-small-data/experiments/${exp_name}/eval_results.jsonl"
