#!/bin/bash

source ~/miniconda3/bin/activate bayesrl

chainSize=(10 20 50 100 200)

diralpha=(1e-4)
beta=(1e-1)

echo " --- EUBRL (w. Mutual Information) LazyChain Stochastic Benchmark --- "

for ds in "${diralpha[@]}"
do
    for bt in "${beta[@]}"
    do
	echo " ... Running Dir(ɑ) ${ds} | ϐ ${bt} ... "

	for cs in "${chainSize[@]}"
	do
	    echo " ... Chain Size ${cs} ..."

	    iterator=($(seq 0 19))

	    for i in "${iterator[@]}"
	    do
		echo " ... Running seed ${i} ..."
		python main.py --agent-name="EUBRL" --env-name="LazyChain" --use-jax --use-normal-gamma-prior --policy-update-interval=$((cs)) --use-eubrl-reward --seed=${i} --num-environment-steps=$((1000 * cs)) --eu-scale=$((3 * (2 * cs))) --dirichlet-param="$ds" --discount-factor=0.999 --eu-type="Information Gain" --chain-size=${cs} --p-error=0.2 --instant-reward --beta=${bt}
	    done
	done
    done
done
