(MDP)
	gridworld_h-6_w-6
(Agents)
	Q-learning,0
	RLang-Q-learning,1
(Params)
	instances : 5
	episodes : 100
	steps : 200
	track_disc_reward : False
