#!/bin/bash

# Top-K MoE PPO with SPHERE (MetaWorld MT10) using gradient-norm scaling.
# Usage:
#   bash scripts/moe/metaworld/run_moe_ppo_topk_sphere_gradscale.sh [additional Hydra params]
#
# Notes:
# - This script sets agent.sphere_scale_mode=grad (gradient-norm based scaling).
# - You can still override any Hydra field via the trailing "$@".

set -euo pipefail

source scripts/common/setup_env.sh

DATE_TIME=$(date +"%Y-%m-%d_%H-%M-%S")
echo "Using unified timestamp: $DATE_TIME"

env HYDRA_FULL_ERROR=1 \
  python src/training/train.py \
  --config-name=algo/moe/moe_ppo_topk_sphere \
  --multirun \
  seed=0,1,2,3,4 \
  tasks=[hammer,push-wall,faucet-close,push-back,stick-pull,handle-press-side,push,shelf-place,window-close,peg-unplug-side] \
  total_timesteps=1000000 \
  date_time=$DATE_TIME \
  agent.sphere_target_ratio=0.1 \
  agent.sphere_scale_mode=grad \
  run_name_prefix=moe_ppo_topk_sphere_gradscale_crl\${use_crl}_N\${agent.policy_kwargs.n_experts}_k\${agent.policy_kwargs.top_k}_str\${agent.sphere_target_ratio}_ssm\${agent.sphere_scale_mode}_scg\${agent.sphere_use_pcgrad}_pm\${param_mult}_ln\${agent.policy_kwargs.use_layer_norm}_l2\${agent.policy_kwargs.use_l2_norm}_amo\${agent.policy_kwargs.apply_to.actor}_cmo\${agent.policy_kwargs.apply_to.critic} \
  "$@"

echo "MetaWorld MoE-PPO SPHERE (gradscale) runs completed"
