set -e

export WANDB_PROJECT='inheritance_new'
export WANDB_MODE=disabled
export CUDA_VISIBLE_DEVICES=1
export TORCHINDUCTOR_FX_GRAPH_CACHE=1

for seed in 42; do
    for task in \
        maze/maze_with_padding \
    ; do
        WANDB_RUN_GROUP=$task-nano-llama torchrun --nproc_per_node=2 run.py \
            --args experiments/inheritance/common/data.yaml experiments/inheritance/$task.yaml experiments/inheritance/common/NanoLlama.yaml \
            --train_args experiments/inheritance/common/train_args_base.yaml \
            --seed=$seed \
            --per_device_train_batch_size=256 \
            --per_device_eval_batch_size=256 \
            --gradient_accumulation_steps=1 \
            --num_eval=512
    done
done