reward_model_config = {
    'reward': {
        'n_layer': 2,
        'n_head': 4,
        'n_embd': 512,
        'dropout': 0,
        'bias': True,
        'lamda_gp': 0
    },

    'img_size': 64,
    'patch_size': 8,
    'max_ep_len': 100,
    'seq_len': 8,

    'weight_decay': 1e-4,
    'warmup_steps': 100,
    'lr': 3e-4,
    'betas': (0.9, 0.9),
    
    'batch_size': 32,
    'gradient_accumulate_every': 1,
} 