############# model parameters #############
Because:
    env_name: 'unlock'
    log_dir: ./log/test
    model_path: './model'
    model_id: 1
    generator:
        none: True
    planner:
        epsilon: 0.4
        max_buffer_size: 10000
        pretrain_buffer_size: 50
        validation_flag: True
        validation_freq: 1      # the frequency of validation
        validation_ratio: 0.3    # ratio of validation set
        lr: 0.001
        weight_decay: 0.001
        n_epochs: 5
        hidden_dim: 128           # hidden layer number
        hidden_size: 128         # hidden layer size
        batch_size: 256
        scale: 1                # normalizing scale for NN input and output, determined by envs
        mpc:
            type: 'Random'
            horizon: 10          # should be consistent with the max height of tower
            popsize: 100        # how many random samples for mpc
            gamma: 1            # reward discount coefficient
            max_iters: 5
            num_elites: 10
        num_envs: 20
    discover:
        discovery_interval: 10
