import numpy as np
# from pdb import set_trace

def calculate_quantities_for_agent(all_rewards, agent_index, probability_rescales, probability_dicts, tau):
    """
    Calculates the probabilities vector (p), utility (u), and payment (t) and constant shift for a specific agent,
    considering the rewards of all agents.
    
    :param all_rewards: A 2D numpy array of rewards, where each row is an agent's reward vector.
    :param agent_index: The index of the current agent (0-based).
    :param probability_rescales: A 1D numpy array of rescales for the probabilities, where each element is the log (p_ref) - log (p_gen) for each outcome.
    :param probability_dicts: A list of dictionaries, where each dictionary contains the probability dictionaries for each outcome. 
    :param tau: The temperature parameter (float).
    
    :return: 
        - probabilities: A 1D numpy array of probabilities for each outcome.
        - utility_i_non_offset: The utility of the current agent for bidding truthfully, without any constant shift, according to the paper. 
        - expected_value_i: The expected value of the current agent for the outcome under truthful bidding.
        - utility_i_zero_bids: The utility of the current agent for bidding zero, without any constant shift, according to the paper.
        - expected_externality_i: The expected externality of the current agent, but holding the set of sentences fixed.
        - expected_value_i_without_i_same_sentences: The expected value of the current agent for the probability of the outcome under not participating, holding the set of sentences fixed (i.e., there are sentences that mention her)
        - expected_value_improvement_i_same_sentences: The expected value improvement of the current agent for participating, holding the set of sentences fixed.
        - estimated_value_i_not_participating: An estimate (using the reports of the other agents) of the value of the agent for the outcome under not participating, over only the sentences where the agent is not mentioned
        - estimated_value_i_not_participating_approximate: An approximate estimate (using the reports of the other agents + the agent's reports for probabilities) of the value of the agent for the outcome under not participating, over only the sentences where the agent is not mentioned
        - true_expected_value_i_without_i_participating: The expected value of the current agent for the outcome under not participating, over only the sentences where the current agent is not mentioned.
        
    imporant values I need to log based on the above metrics: 
    - agent's expected value for participating: expected_value_i
    - agent's externality: expected_externality_i
    - agent's value improvement for participating: expected_value_i - estimated_value_i_not_participating
    - agent's non-offseted utility: utility_i_non_offset
    - agent's offseted utility, with the offset set to the utility of bidding zero: utility_i_non_offset - utility_i_zero_bids
    - agent's offseted utility, with the offset set to the expected value of not participating: utility_i_non_offset - estimated_value_i_not_participating
    - agent's offseted utility, with both offsets: utility_i_non_offset - utility_i_zero_bids - estimated_value_i_not_participating
    - payments in all of the cases: 
        a) t(r) = x(r) \cdot r - U(r)  (no offset)
        b) t(r) = x(r) \cdot r - x(r_{-i}) \cdot r_{-i} - U(r)   (offset equal to the expected value of not participating)
        c) t(r) = x(r) \cdot r - U(r) + U(0)                     (offset equal to the utility of bidding zero)
        d) t(r) = x(r) \cdot r - x(r_{-i}) \cdot r_{-i} - U(r) + U(0) (both offsets)
    
    """
    # Get a copy of the rewards matrix
    all_rewards = np.array(all_rewards).copy()

    # Exclude the current agent's rewards to calculate beta_minus_i (and the constant shift for the current agent)
    other_agents_rewards = np.delete(all_rewards, agent_index, axis=0)
    other_agents_rewards = np.sum(other_agents_rewards, axis=0)  # Sum the rewards of all agents except the current one
    
    # Calculate beta_minus_i for each outcome
    beta_minus_i = other_agents_rewards / tau # (where beta minus i does not include probability rescaling)
    print(f"beta_minus_i: {beta_minus_i}")
    
    # Current agent's rewards
    r_i = all_rewards[agent_index]
    
    # Calculate probabilities vector p using softmax
    if probability_rescales is None:
        probability_rescales = np.zeros_like(beta_minus_i)
        # Note: To allign with the theoretical calculation, probability_rescales should be log p_ref - log p_gen
    
    
    probabilities = np.exp(r_i / tau + beta_minus_i + probability_rescales)  
    probabilities /= np.sum(probabilities)  # Normalize to ensure it's a probability distribution

    # Calculate probabilities without agent i participating x(r_{-i})
    probabilities_without_i = np.exp(beta_minus_i + probability_rescales)
    probabilities_without_i /= np.sum(probabilities_without_i)  # Normalize to ensure it's a probability distribution

    # Calculate agent i's expected externality 
    expected_externality_i = np.sum(probabilities_without_i * other_agents_rewards) - np.sum(probabilities * other_agents_rewards)


    # Calculate agent i's utility for bidding truthfully according to the paper 
    utility_i_non_offset = tau * np.log(np.sum(np.exp(r_i / tau + beta_minus_i + probability_rescales)))

    # Calculate agent i's expected value for participating 
    expected_value_i = np.sum(probabilities * r_i)

    # Calculate agent i's utility for bidding zero 
    utility_i_zero_bids = tau * np.log(np.sum(np.exp(beta_minus_i + probability_rescales)))


    # Calculate agent i's expected value for not participating, holding the sentences generated fixed
    expected_value_i_without_i_same_sentences = np.sum(probabilities_without_i * r_i)  # But this is not truthful, as it depends on agent i's reports (r_i)

    # Calculate agent i's VALUE improvement for participating (holding the set of sentences fixed)
    expected_value_improvement_i_same_sentences = expected_value_i - expected_value_i_without_i_same_sentences 

    # --- Calculate agent i's expected value for *not* participating, keeping in mind the fact that the set of sentences drawn changed now that agent i is participating. ---
    # Find the sentence indexes where agent i is not mentioned
    sentences_with_i = [j for j, sample_dict in enumerate(probability_dicts) if sample_dict[f'advertiser {agent_index} mentioned']]

    # if agent i was mentioned in all sentences: cannot discard them all, so keep them all
    if len(sentences_with_i) == len(probability_dicts):
        sentences_with_i = [] 

    # Set the probability of sentences where agent i is mentioned to zero
    probabilities_without_i_participating = probabilities_without_i.copy()
    for j in sentences_with_i:
        probabilities_without_i_participating[j] = 0

    # Normalize the probabilities
    probabilities_without_i_participating /= np.sum(probabilities_without_i_participating)
    true_expected_value_i_without_i_participating = np.sum(probabilities_without_i_participating * r_i)  
    # Agent i's expected value if she were to not participate, but holding the sentences generated fixed. 

    # -- Estimate the same quantity using all other agents' reports -- 
    other_agents_expected_value_not_participating = []
    other_agents_expected_value_not_participating_approximate = []
    for other_agent_index in range(all_rewards.shape[0]):
        if other_agent_index == agent_index:
            # Skip the current agent
            continue
        
        # Identifying sentences where the other agent is not mentioned
        sentences_with_other_agent = [j for j, sample_dict in enumerate(probability_dicts) if sample_dict[f'advertiser {other_agent_index} mentioned']]

        # If the other agent is mentioned in all sentences, skip the calculation for this agent
        if len(sentences_with_other_agent) == len(probability_dicts):
            sentences_with_other_agent = []
        
        # Set the probability of sentences where the other agent is mentioned to zero
        probabilities_without_other_agent = probabilities_without_i.copy()  # agent i should also not be present in these calculations, as the constant C cannot depend on her reports
        probabilities_without_other_agent_approximate = probabilities.copy() # agent i is present in these calculations, so slightly inaccurate. 
        for j in sentences_with_other_agent:
            probabilities_without_other_agent[j] = 0
            probabilities_without_other_agent_approximate[j] = 0
        
        # Normalize the probabilities for scenarios without the other agent
        probabilities_without_other_agent /= np.sum(probabilities_without_other_agent) 
        probabilities_without_other_agent_approximate /= np.sum(probabilities_without_other_agent_approximate)
        
        # Calculate the expected value for the other agent when not participating, using the modified probabilities
        r_other_agent = all_rewards[other_agent_index]
        expected_value_without_other_agent = np.sum(probabilities_without_other_agent * r_other_agent)
        expected_value_without_other_agent_approximate = np.sum(probabilities_without_other_agent_approximate * r_other_agent)
        other_agents_expected_value_not_participating.append(expected_value_without_other_agent)
        other_agents_expected_value_not_participating_approximate.append(expected_value_without_other_agent_approximate)
        
    estimated_value_i_not_participating = np.mean(other_agents_expected_value_not_participating)
    estimated_value_i_not_participating_approximate = np.mean(other_agents_expected_value_not_participating_approximate)
    
    # sample_probabilities, utility_i, expected_value_i, transfers_i
    return (probabilities, utility_i_non_offset, expected_value_i, utility_i_zero_bids,  expected_externality_i, expected_value_i_without_i_same_sentences, expected_value_improvement_i_same_sentences,
            estimated_value_i_not_participating, estimated_value_i_not_participating_approximate, true_expected_value_i_without_i_participating)

# Example usage
# Assuming a matrix of rewards for all agents and a specific agent index
if __name__ == "__main__":
    all_rewards = np.array([
        [0.0, 0.0, 0.0],  # Agent 0's rewards
        [2.0, 1.5, 2.5],  # Agent 1's rewards
        [3.0, 2.5, 1.3]   # Agent 2's rewards
    ])

    # all_rewards = np.array([
    #     [0.0],  # Agent 0's rewards
    #     [2.0],  # Agent 1's rewards
    #     [3.0]   # Agent 2's rewards
    # ])



    tau = 1

    for agent_index in range(all_rewards.shape[0]):
        probabilities, utility_i_non_offset, utility_i_zero_bids ,expected_value_i, expected_externality_i, true_expected_value_i_without_i, expected_value_improvement_i = calculate_quantities_for_agent(all_rewards, agent_index, None, None, tau)
        print(f"Probabilities vector (x) for agent {agent_index}: {probabilities}")
        print(f"Utility (u) for agent {agent_index}: {utility_i_non_offset}")
        print(f"Utility (u) for agent {agent_index} bidding zero: {utility_i_zero_bids}")
        print(f"Expected value (v) for agent {agent_index}: {expected_value_i}")
        print(f"Expected externality (to other agents) for agent {agent_index}: {expected_externality_i}")
        print(f"Expected value improvement for agent {agent_index}: {expected_value_improvement_i}")
        print(f"True expected value {agent_index} without {agent_index}: {true_expected_value_i_without_i}")

    # set_trace()