import numpy as np

def get_reward(feature_matrix, theta, n_states, state_idx):
    irl_rewards = feature_matrix.dot(theta).reshape((n_states,))
    return irl_rewards[state_idx]

def expert_feature_expectations(feature_matrix, demonstrations, num_demo):
    feature_expectations = np.zeros(feature_matrix.shape[0])
        
    for state_idx, _ in demonstrations:
        feature_expectations += feature_matrix[int(state_idx)]

    feature_expectations /= num_demo
    return feature_expectations

def maxent_irl(expert, learner, theta, learning_rate):
    gradient = expert - learner
    theta += learning_rate * gradient

    # Clip theta
    for j in range(len(theta)):
        if theta[j] > 0:
            theta[j] = 0