env: "gymma"

env_args:
  key: null
  time_limit: 100
  pretrained_wrapper: null
  common_reward: True  # Run experiment with common reward setup
  reward_scalarisation: "sum"  # How to aggregate rewards to single common reward (only used if common_reward is True)

test_greedy: True
test_nepisode: 100
test_interval: 50000
log_interval: 50000
runner_log_interval: 10000
learner_log_interval: 10000
t_max: 2050000
