name: 'q_learning_m'
alpha: 1.0
gamma: 0.97