python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_04.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_05.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_06.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_07.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_08.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_09.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_10.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_11.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_12.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_13.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_14.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_15.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_16.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_17.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_18.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_19.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_20.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400

python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_04.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_05.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_06.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_07.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_08.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_09.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_10.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_11.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_12.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_13.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_14.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_15.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_16.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_17.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_18.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_19.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_20.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 1.0 --width 400

python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_04.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_05.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_06.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_07.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_08.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_09.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_10.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_11.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_12.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_13.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_14.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_15.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_16.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_17.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_18.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_19.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400
python train.py --checkpoint checkpoints/prefix_sums/recallx_arch_loss_ablation_30 --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/recallx_arch_loss_ablation_30 --problem prefix_sums --test_batch_size 500 --test_data 64 --test_iterations 15 50 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_20.txt --train_mode progressive --val_period 150 --warmup_period 10 --weight_for_loss 0.0 --width 400




































