from Environments.AdaptChain import AdaptChain
from Agents.Increinforce import Increinforce
from Agents.Reinforce2 import Reinforce
from Agents.IPGOmega import IPGOmega

import numpy as np
import matplotlib.pyplot as plt

results_dir = 'Results'
tag = 'IPGO_FwdChain'

def fname(tag, α_π, ω, semigrad):
  return f"{results_dir}/{tag}_{α_π:.6f}_{ω:.6f}_{semigrad}.npy"

semigrads = [True]
α_πs = 2.0 ** -np.arange(6, 14)
ωs = [1.0, 0.5, 0.25, 0.03, 0.01, 0.001, 0.0001, 0.0]
#ωs = ωs[0:2]
#ωs = ωs[2:4]
#ωs = ωs[4:6]
ωs = ωs[6:8]


n_runs = 1000
n_steps = 50000
γ = 0.98

env = AdaptChain(50)

for semigrad in semigrads:
  for ω in ωs:
    for α_π in α_πs:
      n_eps = np.zeros((n_runs, n_steps))
      agent = IPGOmega(env, γ=γ, α_π=α_π, ω=ω, semigrad=semigrad)
      for run in range(n_runs):
        np.random.seed(run)
        results = agent.run_steps(n_steps, log_n_eps=True)
        n_eps[run] = results['n_eps']
        print(f"ω: {ω:.6f}, α_π: {α_π:.6f}, semigrad: {semigrad}, run: {run}, n_eps: {n_eps[run][-1]}")
      np.save(fname(tag, α_π, ω, semigrad), n_eps)