[HP]
; Epsilon for Exploration
EPS_D = 0.1
EPS_A = 0.1

; Discount factors
; GAMMA = GAMMA_D = GAMMA_A
GAMMA = 0.8

; Learning rate
ALPHA = 0.05

; Episodes
EPISODES = 150
STEPS_PER_EPISODE = 100

; Number of trails to plot rewards with variance
NUM_TRIALS = 10