# Collect data
CUDA_VISIBLE_DEVICES=0 python3 collect_data_lookahead.py --env darkroom_heldout_lookahead_pred_reward \
    --envs 100000 \
    --H 50 \
    --dim 4

# Train
CUDA_VISIBLE_DEVICES=0 python3 train_new_wt.py --env darkroom_heldout_lookahead_pred_reward \
    --envs 100000 \
    --H 50 \
    --dim 4 \
    --lr 0.001 \
    --layer 4 \
    --head 4 \
    --num_epochs 1000 \
    --batch_size 1024 \
    --shuffle \
    --seed 1

# Evaluate, choose an appropriate epoch
CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env darkroom_heldout_lookahead_pred_reward \
    --envs 100000 \
    --H 50 \
    --dim 4 \
    --lr 0.001 \
    --layer 4 \
    --head 4 \
    --shuffle \
    --epoch 200 \
    --seed 1
