#!/bin/bash
#   # Script to reproduce results
#
# Foldername="0723_offline_meta_rl_qwer"
# mkdir out_logs/${Foldername} &> /dev/null
# maze2d-large-dense-v1
# antmaze-large-diverse-v0
# /data3/zj/optimal_transport_reward/reward_learning/rewards/ensemble_antmaze-large-diverse-v0_initial_pairs_5_num_queries_5_num_iter_20_retrain_num_iter_20_voi_myucb_seed_15_round_num_5.npy
# /data3/zj/optimal_transport_reward/reward_learning/rewards/ensemble_antmaze-large-diverse-v0_initial_pairs_5_num_queries_5_num_iter_20_retrain_num_iter_20_voi_dis_seed_5_round_num_9.npy
# /data3/zj/optimal_transport_reward/reward_learning/rewards/ensemble_antmaze-large-diverse-v0_initial_pairs_5_num_queries_5_num_iter_20_retrain_num_iter_20_voi_contrastive_seed_25_round_num_9.npy

 declare -a use_dataset_rewards=( "1" )
  declare -a prefixes=( "ori" )
  declare  -a seeds=("25" "26" "27"  )
    declare  -a task_names=( "antmaze-medium-play-v2" "antmaze-medium-diverse-v2" "antmaze-large-play-v2" "antmaze-large-diverse-v2" )
 reward_file="None"
  for task_name in "${task_names[@]}"
do
 mkdir out_logs/${task_name} &> /dev/null
 done
 n=6
 gpunum=7
 for use_dataset_reward in "${use_dataset_rewards[@]}"
 do
 for seed in "${seeds[@]}"
 do
 for prefix in "${prefixes[@]}"
 do
 for task_name in "${task_names[@]}"
do
 CUDA_VISIBLE_DEVICES=${n}   XLA_PYTHON_CLIENT_PREALLOCATE=false  nohup python -m otr.train_offline \
    --workdir /data3/zj/optimal_transport_reward/results/${task_name}/${prefix}_use_dataset_rewards_${use_dataset_reward}_seed_${seed} \
    --reward_file ${reward_file}  \
    --config /data3/zj/optimal_transport_reward/otr/configs/otr_iql_antmaze.py \
    --config.expert_dataset_name=${task_name} \
    --config.k=10 \
    --config.offline_dataset_name=${task_name} \
    --config.use_dataset_reward=${use_dataset_reward} \
    --config.seed=${seed} \
    >& out_logs/${task_name}/${prefix}_use_dataset_rewards_${use_dataset_reward}_seed_${seed}.txt &
  n=$[($n+1) % ${gpunum}]
 sleep 1
 done
 done
 done
 done


# data collection: python data_collection_ml1.py ./configs/ml1.json
# To change to other environments, modify "env_name" in ./configs/ml1.json.

# Training: bash run_ml1.sh.
# To change to other environments, modify "datadirs" in line 10, as well as "env_name" in ./configs/cpeal-ml1.json.

# Plot training curves: See plot_new_ml1.py.