env: nstep_matrix

env_args:
  seed: 8888
  steps: 10
  good_branches: 2
  state_type: 'idx'

test_nepisode: 32
test_interval: 500
log_interval: 500
runner_log_interval: 500
learner_log_interval: 500
t_max: 105000
