#!/bin/bash

#SBATCH --job-name=modify-rlds
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=50
#SBATCH --mem=200G
#SBATCH --time=23:59:59
#SBATCH --job-name=rlds
#SBATCH --output=logs/rlds-%J.log
#SBATCH --error=logs/rlds-%J.err

# increase limit on number of files opened in parallel to 20k --> conversion opens up to 1k temporary files in /tmp to store dataset during conversion
ulimit -n 200000

# dataset: bridge_dataset, or fractal20220817_data
python scripts/data/modify_rlds_dataset_asjson.py \
    --dataset=fractal20220817_data \
    --data_dir=/mnt/zjk/jianke_z/open-x-embodiment \
    --metadata_dir=/mnt/zjk/jianke_z/open-x-embodiment/fractal_metadata \
    --images_dir=/mnt/zjk/jianke_z/open-x-embodiment/fractal_metadata/images \
