python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 10 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi10_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 15 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi15_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 20 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi20_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 25 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi25_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.25_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.25 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.5_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.5 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt0.75_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 0.75 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width100_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width100_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width100_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 100 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width200_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width200_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width200_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 200 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width300_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width300_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width300_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 300 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recall_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_recall_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_01.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_02.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
python train.py --checkpoint checkpoints/prefix_sums/iclr_search --clip 1 --epochs 150 --lr 0.001 --lr_decay step --lr_factor 0.1 --lr_schedule 60 100 --max_iters 30 --min_k 1 --min_n 0 --model recur_net_recallx_1d --optimizer adam --output results/prefix_sums/iclr_search --problem prefix_sums --test_batch_size 500 --test_data 128 --test_iterations 20 30 40 50 60 70 80 90 100 --test_mode max_conf --train_batch_size 100 --train_data 32 --train_log mi30_tmprogressive_wt1_mk1_mn0_width400_recur_net_recallx_1d_03.txt --train_mode progressive --val_period 50 --warmup_period 10 --weight_for_loss 1 --width 400 
