# Copyright (c) 2024, NVIDIA CORPORATION.  All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.

project: 'FM-IRL'
program: fmirl/main.py
method: grid
name: hopper-gail-1traj
command:
  - ${env}
  - python3
  - ${program}
  - ${args}

parameters:
  seed:
    values: [1, 2, 3, 4]
    # value: 1
  prefix:
    value: "gail"
  alg:
    value: "gail"
  traj-load-path:
    value: ./expert_datasets/ppo_hopper_1.pt
  env-name:
    value: "hopper-medium-v0"
  normalize-env:
    value: True
  cuda:
    value: True
  eval-num-processes:
    value: 1
  num-render:
    value: 1000
  num-eval:
    value: 2
  vid-fps:
    value: 100
  log-interval:
    value: 1
  save-interval:
    value: 100000
  eval-interval:
    value: 20000
  clip-actions:
    value: True
  traj-batch-size:
    value: 128
  max-grad-norm:
    value: 0.5
  num-env-steps:
    value: 25000000
  traj-frac:
    value: 1
  action-input:
    value: False
  entropy-coef:
    value: 0.001

  num-steps:                # Update policy storage's length
    value: 128
  num-epochs:               # Update policy epock
    value: 10
  num-mini-batch:           # For update policy
    value: 32
  gail-state-norm:
    value: True
  gail-reward-norm:
    value: True
  lr:
    # values: [0.001, 0.0001, 0.00001]
    value: 0.0001            # noise 2.00
  disc-lr:
    # values: [0.000002, 0.0000005]         # noise 2.00
    value: 0.0000005         # noise 2.00
  reward-type:
    value: "airl-positive"
  ppo-hidden-dim:
    value: 256

