#!/usr/bin/env bash

# NOTE: This script is meant to be duplicated/edited to run many sweeps.

set -euo pipefail

as_bool() {
	# usage: as_bool "$VALUE"; returns 0 (true) or 1 (false)
	# accepted true: 1, true, yes, y, on (case-insensitive)
	# accepted false: 0, false, no, n, off, "" (case-insensitive)
	local v="${1:-}"
	v="${v,,}"
	case "$v" in
		1|true|yes|y|on) return 0 ;;
		0|false|no|n|off|"") return 1 ;;
		*)
			echo "Invalid boolean value: '$1' (use true/false or 1/0)" >&2
			return 2
			;;
	esac
}

############################
# Basic experiment settings #
############################
ENV_ID="jumper"
SEED="3"
GPU_ID="3"

# Name appears in tensorboard run dir and wandb (if enabled)
EXP_NAME="hard_50M_lambdaactor0_up0.3333_lambdacritic0_up0.3333_seed3_pairs2048_resolution4_degree3_pca3_random_nol2"
#############################################
# WD / polynomial regularization hyperparams #
#############################################
TOTAL_TIMESTEPS=50000000
# distribution_mode
DISTRIBUTION_MODE='hard'

# Chebyshev / polynomial fit
RESOLUTION="4"          # number of points along interpolation path (>=3)
MAX_DEGREE="3"          # Chebyshev max degree
DEGREE_MODE="index"     # "index" or other mode used by polynomial_regularization

# Pair sampling
NUMS_PAIRS="2048"       # number of pairs sampled per minibatch for reg
RANDOM_ALPHA="true"     # random cosine alpha per pair (recommended)

# Output transforms (optional)
SMOOTH="false"          # smooth sequence along resolution
PCA_REG="3"            # if >0, apply PCA (first component) to actor sequence

# Regularization details
MIU="0.0"               # kept for compatibility with polynomial_regularization signature
USE_NORM="false"        # use norm in wd computation
REMOVE_CONST="false"    # remove 0-order term
SQUARE="false"          # use square instead of abs in wd

# Coefficients
LAMBDA_REG_ACTOR="0.001"
LAMBDA_REG_CRITIC="0.001"
LAMBDA_REG_ACTOR_WARMUP="0.3333"  # warmup fraction of total iterations
LAMBDA_REG_ACTOR_WARM_TYPE="up"  # warmup type: up / down
LAMBDA_REG_CRITIC_WARMUP="0.3333"  # warmup fraction of total iterations
LAMBDA_REG_CRITIC_WARM_TYPE="up"  # warmup type:

################################
# Logging (stdout/stderr to log) #
################################
LOG_FILE="./raw_new_procgen_${ENV_ID}_${EXP_NAME}.log"
exec > >(tee -a "${LOG_FILE}") 2>&1

ARGS=(
	--env-id "${ENV_ID}"
	--seed "${SEED}"
	--gpu-id "${GPU_ID}"
	--exp-name "${EXP_NAME}"
	--distribution-mode "${DISTRIBUTION_MODE}"
	--total-timesteps "${TOTAL_TIMESTEPS}"

	--resolution "${RESOLUTION}"
	--max-degree "${MAX_DEGREE}"
	--degree-mode "${DEGREE_MODE}"
	--nums-pairs "${NUMS_PAIRS}"
	--pca-reg "${PCA_REG}"
	--miu "${MIU}"

	--lambda-reg-actor "${LAMBDA_REG_ACTOR}"
	--lambda-reg-critic "${LAMBDA_REG_CRITIC}"
	--lambda-reg-actor-warmup "${LAMBDA_REG_ACTOR_WARMUP}"
	--lambda-reg-actor-warm-type "${LAMBDA_REG_ACTOR_WARM_TYPE}"
	--lambda-reg-critic-warmup "${LAMBDA_REG_CRITIC_WARMUP}"
	--lambda-reg-critic-warm-type "${LAMBDA_REG_CRITIC_WARM_TYPE}"
)

if as_bool "${RANDOM_ALPHA}"; then ARGS+=(--random-alpha); else ARGS+=(--no-random-alpha); fi
if as_bool "${SMOOTH}"; then ARGS+=(--smooth); else ARGS+=(--no-smooth); fi
if as_bool "${USE_NORM}"; then ARGS+=(--use-norm); else ARGS+=(--no-use-norm); fi
if as_bool "${REMOVE_CONST}"; then ARGS+=(--remove-const); else ARGS+=(--no-remove-const); fi
if as_bool "${SQUARE}"; then ARGS+=(--square); else ARGS+=(--no-square); fi

python ppo_procgen.py "${ARGS[@]}"
