import numpy as np

class AdamOptimizer:
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
        self.learning_rate = learning_rate
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.m_t = {}  # Dictionary for first moment
        self.v_t = {}  # Dictionary for second moment
        self.t = 0  # Time step

    def reinitialize(self):
        self.m_t = {}  # Dictionary for first moment
        self.v_t = {}  # Dictionary for second moment
        self.t = 0  # Time step

    def check(self):
      print('m_t', self.m_t, 'v_t ', self.v_t, 'lr ', self.learning_rate)
      
    def update(self, params, grads):
        self.t += 1  # Increment time step
        
        updated_params = {}
        for key in params:
            
            # Initialize moment estimates for each parameter if not already done
            if key not in self.m_t:
                self.m_t[key] = np.zeros_like(params[key])
            if key not in self.v_t:
                self.v_t[key] = np.zeros_like(params[key])

            # Update biased first moment estimate
            self.m_t[key] = self.beta1 * self.m_t[key] + (1 - self.beta1) * grads[key]
            # Update biased second raw moment estimate
            self.v_t[key] = self.beta2 * self.v_t[key] + (1 - self.beta2) * (grads[key] ** 2)

            # Correct bias in first and second moment estimates
            m_hat = self.m_t[key] / (1 - self.beta1 ** self.t)
            v_hat = self.v_t[key] / (1 - self.beta2 ** self.t)

            # Update parameters
            param_update = self.learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon)
            updated_params[key] = params[key] - param_update

        return updated_params