################ We collect data using TS, and optimal 
################ action is the next action (lookahead)

# # Collect data using TS
CUDA_VISIBLE_DEVICES=0 python3 collect_data_lookahead.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --var 0.3 \
    --cov 0.0 \
    --lin_d 40 \
    --envs_eval 200 \
    --data_type TS_pred_reward \
    --pred_reward_type linear

# # Train
CUDA_VISIBLE_DEVICES=0 python3 train_new_wt.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 2 \
    --head 2 \
    --num_epochs 1000 \
    --batch_size 1024 \
    --seed 1 \
    --pred_reward_type linear

# # Evaluate, choose an appropriate epoch
CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 2 \
    --head 2 \
    --epoch 350 \
    --n_eval 200 \
    --seed 1 \
    --pred_reward_type linear



#######################################
#######################################
#######################################



# # Collect data using TS
CUDA_VISIBLE_DEVICES=0 python3 collect_data_lookahead.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --var 0.3 \
    --cov 0.0 \
    --lin_d 40 \
    --envs_eval 200 \
    --data_type TS_pred_reward \
    --pred_reward_type linear

# # Train
CUDA_VISIBLE_DEVICES=0 python3 train_new_wt.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 4 \
    --head 4 \
    --num_epochs 1000 \
    --batch_size 1024 \
    --seed 1 \
    --pred_reward_type linear

# # Evaluate, choose an appropriate epoch
CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 4 \
    --head 4 \
    --epoch 350 \
    --n_eval 200 \
    --seed 1 \
    --pred_reward_type linear



#######################################
#######################################
#######################################


# # Collect data using TS
CUDA_VISIBLE_DEVICES=0 python3 collect_data_lookahead.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --var 0.3 \
    --cov 0.0 \
    --lin_d 40 \
    --envs_eval 200 \
    --data_type TS_pred_reward \
    --pred_reward_type linear

# # Train
CUDA_VISIBLE_DEVICES=0 python3 train_new_wt.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 6 \
    --head 6 \
    --num_epochs 1000 \
    --batch_size 1024 \
    --seed 1 \
    --pred_reward_type linear

# # Evaluate, choose an appropriate epoch
CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 6 \
    --head 6 \
    --epoch 350 \
    --n_eval 200 \
    --seed 1 \
    --pred_reward_type linear


#######################################
#######################################
#######################################


# # Collect data using TS
CUDA_VISIBLE_DEVICES=0 python3 collect_data_lookahead.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --var 0.3 \
    --cov 0.0 \
    --lin_d 40 \
    --envs_eval 200 \
    --data_type TS_pred_reward \
    --pred_reward_type linear

# # Train
CUDA_VISIBLE_DEVICES=0 python3 train_new_wt.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 8 \
    --head 8 \
    --num_epochs 1000 \
    --batch_size 1024 \
    --seed 1 \
    --pred_reward_type linear

# # Evaluate, choose an appropriate epoch
CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 8 \
    --head 8 \
    --epoch 350 \
    --n_eval 200 \
    --seed 1 \
    --pred_reward_type linear


#######################################
#######################################
#######################################


# # Collect data using TS
CUDA_VISIBLE_DEVICES=0 python3 collect_data_lookahead.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160004 \
    --H 20 \
    --dim 20 \
    --var 0.3 \
    --cov 0.0 \
    --lin_d 40 \
    --envs_eval 200 \
    --data_type TS_pred_reward \
    --pred_reward_type linear

# # Train
CUDA_VISIBLE_DEVICES=0 python3 train_new_wt.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160004 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 10 \
    --head 10 \
    --num_epochs 1000 \
    --batch_size 1024 \
    --seed 1 \
    --pred_reward_type linear

# # Evaluate, choose an appropriate epoch
CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160004 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 10 \
    --head 10 \
    --epoch 350 \
    --n_eval 200 \
    --seed 1 \
    --pred_reward_type linear



# #######################################
# #######################################
# #######################################



# # Collect data using TS
CUDA_VISIBLE_DEVICES=0 python3 collect_data_lookahead.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --var 0.3 \
    --cov 0.0 \
    --lin_d 40 \
    --envs_eval 200 \
    --data_type TS_pred_reward \
    --pred_reward_type linear

# # Train
CUDA_VISIBLE_DEVICES=0 python3 train_new_wt.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 12 \
    --head 12 \
    --num_epochs 1000 \
    --batch_size 1024 \
    --seed 1 \
    --pred_reward_type linear

# # Evaluate, choose an appropriate epoch
CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_lookahead_pred_reward \
    --envs 160000 \
    --H 20 \
    --dim 20 \
    --lin_d 40 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 12 \
    --head 12 \
    --epoch 350 \
    --n_eval 200 \
    --seed 1 \
    --pred_reward_type linear
