# ################ We collect data using TS, and optimal 
# ################ action is the next action (lookahead)

# # # # Evaluate, choose an appropriate epoch
# CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
#     --envs 150080 \
#     --H 20 \
#     --dim 20 \
#     --lin_d 5 \
#     --var 0.3 \
#     --cov 0.0 \
#     --lr 0.00015 \
#     --layer 4 \
#     --head 4 \
#     --epoch 450 \
#     --n_eval 200 \
#     --seed 1 \
#     --pred_reward_type linear \
#     --eval_type Test_more_models \
#     --eval_new_env linear_bandit_train_lookahead_pred_reward \
#     --eval_new_env1 linear_bandit_train_AD



# # # # Evaluate, choose an appropriate epoch
# CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
#     --envs 150080 \
#     --H 40 \
#     --dim 20 \
#     --lin_d 5 \
#     --var 0.3 \
#     --cov 0.0 \
#     --lr 0.00015 \
#     --layer 4 \
#     --head 4 \
#     --epoch 450 \
#     --n_eval 200 \
#     --seed 1 \
#     --pred_reward_type linear \
#     --eval_type Test_more_models \
#     --eval_new_env linear_bandit_train_lookahead_pred_reward \
#     --eval_new_env1 linear_bandit_train_AD




# # # # Evaluate, choose an appropriate epoch
# CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
#     --envs 150080 \
#     --H 60 \
#     --dim 20 \
#     --lin_d 5 \
#     --var 0.3 \
#     --cov 0.0 \
#     --lr 0.00015 \
#     --layer 4 \
#     --head 4 \
#     --epoch 450 \
#     --n_eval 200 \
#     --seed 1 \
#     --pred_reward_type linear \
#     --eval_type Test_more_models \
#     --eval_new_env linear_bandit_train_lookahead_pred_reward \
#     --eval_new_env1 linear_bandit_train_AD




# # # # # Evaluate, choose an appropriate epoch
# # CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
# #     --envs 150080 \
# #     --H 80 \
# #     --dim 20 \
# #     --lin_d 5 \
# #     --var 0.3 \
# #     --cov 0.0 \
# #     --lr 0.00015 \
# #     --layer 4 \
# #     --head 4 \
# #     --epoch 450 \
# #     --n_eval 200 \
# #     --seed 1 \
# #     --pred_reward_type linear \
# #     --eval_type Test_more_models \
# #     --eval_new_env linear_bandit_train_lookahead_pred_reward \
# #     --eval_new_env1 linear_bandit_train_AD



# # # # # Evaluate, choose an appropriate epoch
# # CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
# #     --envs 150080 \
# #     --H 100 \
# #     --dim 20 \
# #     --lin_d 5 \
# #     --var 0.3 \
# #     --cov 0.0 \
# #     --lr 0.00015 \
# #     --layer 4 \
# #     --head 4 \
# #     --epoch 450 \
# #     --n_eval 200 \
# #     --seed 1 \
# #     --pred_reward_type linear \
# #     --eval_type Test_more_models \
# #     --eval_new_env linear_bandit_train_lookahead_pred_reward \
# #     --eval_new_env1 linear_bandit_train_AD




# # # # Evaluate, choose an appropriate epoch
# CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
#     --envs 150080 \
#     --H 120 \
#     --dim 20 \
#     --lin_d 5 \
#     --var 0.3 \
#     --cov 0.0 \
#     --lr 0.00015 \
#     --layer 4 \
#     --head 4 \
#     --epoch 450 \
#     --n_eval 200 \
#     --seed 1 \
#     --pred_reward_type linear \
#     --eval_type Test_more_models \
#     --eval_new_env linear_bandit_train_lookahead_pred_reward \
#     --eval_new_env1 linear_bandit_train_AD



# # # # Evaluate, choose an appropriate epoch
# CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
#     --envs 150080 \
#     --H 140 \
#     --dim 20 \
#     --lin_d 5 \
#     --var 0.3 \
#     --cov 0.0 \
#     --lr 0.00015 \
#     --layer 4 \
#     --head 4 \
#     --epoch 450 \
#     --n_eval 200 \
#     --seed 1 \
#     --pred_reward_type linear \
#     --eval_type Test_more_models \
#     --eval_new_env linear_bandit_train_lookahead_pred_reward \
#     --eval_new_env1 linear_bandit_train_AD



# # # # Evaluate, choose an appropriate epoch
# CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
#     --envs 150080 \
#     --H 200 \
#     --dim 20 \
#     --lin_d 5 \
#     --var 0.3 \
#     --cov 0.0 \
#     --lr 0.00015 \
#     --layer 4 \
#     --head 4 \
#     --epoch 450 \
#     --n_eval 200 \
#     --seed 1 \
#     --pred_reward_type linear \
#     --eval_type Test_more_models \
#     --eval_new_env linear_bandit_train_lookahead_pred_reward \
#     --eval_new_env1 linear_bandit_train_AD



