state_num: 5
action_num: 5
initial_state_distribution_dict:
  s1: 0.3
  s2: 0.2
  s3: 0.1
  s4: 0.15
  s5: 0.25
initial_theta_dict:
  s1:
  - 1.0
  - 2.0
  - 4.0
  - 3.0
  - 5.0
  s2:
  - 3.0
  - 4.0
  - 5.0
  - 1.0
  - 2.0
  s3:
  - 5.0
  - 2.0
  - 3.0
  - 4.0
  - 1.0
  s4:
  - 5.0
  - 4.0
  - 2.0
  - 1.0
  - 3.0
  s5:
  - 2.0
  - 4.0
  - 3.0
  - 5.0
  - 1.0
reward_dict:
  s1_a1: 1.0
  s1_a2: 0.8
  s1_a3: 0.6
  s1_a4: 0.7
  s1_a5: 0.4
  s2_a1: 0.5
  s2_a2: 0.3
  s2_a3: 0.1
  s2_a4: 1.0
  s2_a5: 0.6
  s3_a1: 0.6
  s3_a2: 0.9
  s3_a3: 0.8
  s3_a4: 0.7
  s3_a5: 1.0
  s4_a1: 0.1
  s4_a2: 0.2
  s4_a3: 0.6
  s4_a4: 0.7
  s4_a5: 0.4
  s5_a1: 0.8
  s5_a2: 0.4
  s5_a3: 0.6
  s5_a4: 0.2
  s5_a5: 0.9
transition_prob_dict:
  s1a1_s1: 0.1
  s1a1_s2: 0.5
  s1a1_s3: 0.1
  s1a1_s4: 0.2
  s1a1_s5: 0.1
  s1a2_s1: 0.6
  s1a2_s2: 0.1
  s1a2_s3: 0.1
  s1a2_s4: 0.1
  s1a2_s5: 0.1
  s1a3_s1: 0.5
  s1a3_s2: 0.1
  s1a3_s3: 0.1
  s1a3_s4: 0.2
  s1a3_s5: 0.1
  s1a4_s1: 0.4
  s1a4_s2: 0.3
  s1a4_s3: 0.1
  s1a4_s4: 0.1
  s1a4_s5: 0.1
  s1a5_s1: 0.2
  s1a5_s2: 0.1
  s1a5_s3: 0.1
  s1a5_s4: 0.1
  s1a5_s5: 0.5
  s2a1_s1: 0.1
  s2a1_s2: 0.5
  s2a1_s3: 0.2
  s2a1_s4: 0.1
  s2a1_s5: 0.1
  s2a2_s1: 0.4
  s2a2_s2: 0.1
  s2a2_s3: 0.2
  s2a2_s4: 0.2
  s2a2_s5: 0.1
  s2a3_s1: 0.1
  s2a3_s2: 0.4
  s2a3_s3: 0.3
  s2a3_s4: 0.1
  s2a3_s5: 0.1
  s2a4_s1: 0.4
  s2a4_s2: 0.1
  s2a4_s3: 0.1
  s2a4_s4: 0.1
  s2a4_s5: 0.3
  s2a5_s1: 0.2
  s2a5_s2: 0.2
  s2a5_s3: 0.2
  s2a5_s4: 0.2
  s2a5_s5: 0.2
  s3a1_s1: 0.6
  s3a1_s2: 0.1
  s3a1_s3: 0.1
  s3a1_s4: 0.1
  s3a1_s5: 0.1
  s3a2_s1: 0.2
  s3a2_s2: 0.4
  s3a2_s3: 0.1
  s3a2_s4: 0.2
  s3a2_s5: 0.1
  s3a3_s1: 0.3
  s3a3_s2: 0.3
  s3a3_s3: 0.2
  s3a3_s4: 0.1
  s3a3_s5: 0.1
  s3a4_s1: 0.1
  s3a4_s2: 0.4
  s3a4_s3: 0.3
  s3a4_s4: 0.1
  s3a4_s5: 0.1
  s3a5_s1: 0.2
  s3a5_s2: 0.1
  s3a5_s3: 0.1
  s3a5_s4: 0.1
  s3a5_s5: 0.5
  s4a1_s1: 0.6
  s4a1_s2: 0.1
  s4a1_s3: 0.1
  s4a1_s4: 0.1
  s4a1_s5: 0.1
  s4a2_s1: 0.1
  s4a2_s2: 0.5
  s4a2_s3: 0.1
  s4a2_s4: 0.2
  s4a2_s5: 0.1
  s4a3_s1: 0.2
  s4a3_s2: 0.1
  s4a3_s3: 0.1
  s4a3_s4: 0.1
  s4a3_s5: 0.5
  s4a4_s1: 0.4
  s4a4_s2: 0.3
  s4a4_s3: 0.1
  s4a4_s4: 0.1
  s4a4_s5: 0.1
  s4a5_s1: 0.5
  s4a5_s2: 0.1
  s4a5_s3: 0.1
  s4a5_s4: 0.2
  s4a5_s5: 0.1
  s5a1_s1: 0.2
  s5a1_s2: 0.2
  s5a1_s3: 0.2
  s5a1_s4: 0.2
  s5a1_s5: 0.2
  s5a2_s1: 0.4
  s5a2_s2: 0.1
  s5a2_s3: 0.2
  s5a2_s4: 0.2
  s5a2_s5: 0.1
  s5a3_s1: 0.4
  s5a3_s2: 0.1
  s5a3_s3: 0.1
  s5a3_s4: 0.3
  s5a3_s5: 0.1
  s5a4_s1: 0.1
  s5a4_s2: 0.4
  s5a4_s3: 0.2
  s5a4_s4: 0.1
  s5a4_s5: 0.2
  s5a5_s1: 0.2
  s5a5_s2: 0.5
  s5a5_s3: 0.1
  s5a5_s4: 0.1
  s5a5_s5: 0.1