#!/bin/bash

# Top-K MoE PPO (MetaWorld MT10), no SPHERE regularization
# Usage: bash scripts/moe/metaworld/run_moe_ppo_topk.sh [additional Hydra params]

set -euo pipefail

source scripts/common/setup_env.sh

DATE_TIME=$(date +"%Y-%m-%d_%H-%M-%S")
echo "Using unified timestamp: $DATE_TIME"

env HYDRA_FULL_ERROR=1 \
  python src/training/train.py \
  --config-name=algo/moe/moe_ppo_topk \
  --multirun \
  seed=0,1,2,3,4 \
  tasks=[hammer,push-wall,faucet-close,push-back,stick-pull,handle-press-side,push,shelf-place,window-close,peg-unplug-side] \
  total_timesteps=1000000 \
  date_time=$DATE_TIME \
  run_name_prefix=moe_ppo_topk_crl\${use_crl}_N\${agent.policy_kwargs.n_experts}_k\${agent.policy_kwargs.top_k}_pm\${param_mult}_ln\${agent.policy_kwargs.use_layer_norm}_l2\${agent.policy_kwargs.use_l2_norm}_amo\${agent.policy_kwargs.apply_to.actor}_cmo\${agent.policy_kwargs.apply_to.critic} \
  "$@"

echo "MetaWorld MoE-PPO TopK runs completed"
