scores:
  HumanHeartDipole:
    fwd_fmas: 240
    rev_fmas: 172
    mM_fmas: 194
  PropaneCombustion:
    fwd_fmas: 151
    rev_fmas: 90
    mM_fmas: 111
  RoeFlux_1d:
    fwd_fmas: 620
    rev_fmas: 364
    mM_fmas: 407
  RoeFlux_3d:
    fwd_fmas: 1556
    rev_fmas: 979
    mM_fmas: 938
  RobotArm_6DOF:
    fwd_fmas: 397
    rev_fmas: 301
    mM_fmas: 288
  BlackScholes_Jacobian:
    fwd_fmas: 545
    rev_fmas: 572
    mM_fmas: 350
  g:
    fwd_fmas: 632
    rev_fmas: 566
    mM_fmas: 451
  f:
    fwd_fmas: 17728
    rev_fmas: 8859
    mM_fmas: 12083
  Encoder:
    fwd_fmas: 135010
    rev_fmas: 4688
    mM_fmas: 51869
  Perceptron: 
    fwd_fmas: 10930
    rev_fmas: 392
    mM_fmas: 4796


hyperparameters:
  num_envs: 32 # this needs to be much larger probably
  batchsize: 4096
  episodes: 5000
  value_weight: 10.
  l2_weight: 0.0001
  lr: 0.001
  discount: 1.0

  A0:
    gumbel_scale: 1.0
    num_considered_actions: 5
    num_simulations: 50
    replay_buffer_size: 50000 # too large of a replay buffer is not good
    lookback: 1

    qtransform: # clibration of qtransform is important for learning
      value_scale: 0.01
      maxvisit_init: 25
      rescale_values: True

