# rl_framework/utils.py
import random
from collections import defaultdict
from typing import Tuple

import numpy as np
import torch


def set_seed(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


class ReplayBuffer:

    def __init__(self, obs_shape: Tuple[int, ...], capacity: int, device: torch.device):
        self.capacity = capacity
        self.device = device

        self.obs_buf = np.zeros((capacity, *obs_shape), dtype=np.float32)
        self.next_obs_buf = np.zeros((capacity, *obs_shape), dtype=np.float32)
        self.actions_buf = np.zeros((capacity,), dtype=np.int64)
        self.rewards_buf = np.zeros((capacity,), dtype=np.float32)
        self.dones_buf = np.zeros((capacity,), dtype=np.float32)

        self.ptr = 0
        self.size = 0

    def add(self, obs, action, reward, next_obs, done) -> None:
        self.obs_buf[self.ptr] = obs
        self.next_obs_buf[self.ptr] = next_obs
        self.actions_buf[self.ptr] = action
        self.rewards_buf[self.ptr] = reward
        self.dones_buf[self.ptr] = float(done)

        self.ptr = (self.ptr + 1) % self.capacity
        self.size = min(self.size + 1, self.capacity)

    def sample(self, batch_size: int):
        idx = np.random.randint(0, self.size, size=batch_size)

        obs = torch.as_tensor(self.obs_buf[idx], device=self.device)
        next_obs = torch.as_tensor(self.next_obs_buf[idx], device=self.device)
        actions = torch.as_tensor(self.actions_buf[idx], device=self.device)
        rewards = torch.as_tensor(self.rewards_buf[idx], device=self.device)
        dones = torch.as_tensor(self.dones_buf[idx], device=self.device)

        return obs, actions, rewards, next_obs, dones
