################ We collect data using TS, and optimal 
################ action is the next action (lookahead)

# # # Collect data using TS
CUDA_VISIBLE_DEVICES=1 python3 collect_data_lookahead.py --env linear_bandit_train_original_emp_opt \
    --envs 300001 \
    --H 20 \
    --dim 30 \
    --var 0.3 \
    --cov 0.0 \
    --lin_d 15 \
    --envs_eval 200 \
    --data_type TS_pred_reward \
    --pred_reward_type non_linear

# # Train
CUDA_VISIBLE_DEVICES=1 python3 train_new_wt.py --env linear_bandit_train_original_emp_opt \
    --envs 300001 \
    --H 20 \
    --dim 30 \
    --lin_d 15 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 4 \
    --head 4 \
    --num_epochs 1000 \
    --batch_size 256 \
    --seed 1 \
    --pred_reward_type non_linear

# # Evaluate, choose an appropriate epoch
CUDA_VISIBLE_DEVICES=1 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
    --envs 300001 \
    --H 20 \
    --dim 30 \
    --lin_d 15 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 4 \
    --head 4 \
    --epoch 1000 \
    --n_eval 200 \
    --seed 1 \
    --pred_reward_type non_linear




# ################ We collect data using TS, and optimal 
# ################ action is the next action (lookahead)


# # # # Collect data using TS
# CUDA_VISIBLE_DEVICES=1 python3 collect_data_lookahead.py --env linear_bandit_train_original_emp_opt \
#     --envs 300001 \
#     --H 20 \
#     --dim 10 \
#     --var 0.3 \
#     --cov 0.0 \
#     --lin_d 25 \
#     --envs_eval 200 \
#     --data_type TS_pred_reward \
#     --pred_reward_type non_linear

# # # Train
# CUDA_VISIBLE_DEVICES=1 python3 train_new_wt.py --env linear_bandit_train_original_emp_opt \
#     --envs 300001 \
#     --H 20 \
#     --dim 10 \
#     --lin_d 25 \
#     --var 0.3 \
#     --cov 0.0 \
#     --lr 0.00015 \
#     --layer 4 \
#     --head 4 \
#     --num_epochs 1000 \
#     --batch_size 256 \
#     --seed 1 \
#     --pred_reward_type non_linear

# # # Evaluate, choose an appropriate epoch
# CUDA_VISIBLE_DEVICES=1 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
#     --envs 300001 \
#     --H 20 \
#     --dim 10 \
#     --lin_d 25 \
#     --var 0.3 \
#     --cov 0.0 \
#     --lr 0.00015 \
#     --layer 4 \
#     --head 4 \
#     --epoch 1000 \
#     --n_eval 200 \
#     --seed 1 \
#     --pred_reward_type non_linear