################ We collect data using TS, and optimal 
################ action is the next action (lookahead)


# # # Evaluate, choose an appropriate epoch
CUDA_VISIBLE_DEVICES=0 python3 eval_new.py --env linear_bandit_train_original_emp_opt \
    --envs 200195 \
    --H 40 \
    --dim 100 \
    --lin_d 5 \
    --var 0.3 \
    --cov 0.0 \
    --lr 0.00015 \
    --layer 4 \
    --head 4 \
    --epoch 950 \
    --n_eval 200 \
    --seed 1 \
    --pred_reward_type non_linear \
    --eval_type Test_more_models \
    --eval_new_env linear_bandit_train_lookahead_pred_reward \
    --eval_new_env1 linear_bandit_train_AD 


