import numpy as np

class Policy():
    def __init__(self, narms, T, internal_rank, NUMOFPLAYERS):
        self.T = T  # horizon
        self.t = 0  # current round
        self.K = narms  # number of arms
        self.int_rank = internal_rank  # set directly
        self.num_of_players = NUMOFPLAYERS # player 的总数
        self.means = np.zeros(narms)  # empirical means
        self.B = np.inf * np.ones(narms)  # confidence bound
        self.npulls = np.zeros(narms)  # number of pulls for each arm

    def play(self):
        """
        Select an arm to pull. This method should be overridden by subclasses.
        If not overridden, it just selects a random arm.
        """
        return np.random.randint(self.K)
