# coding:utf8
import tensorflow.compat.v1 as tf
import numpy as np

class Pi_banditNet(object):
  def __init__(self, X_count, A_count,lambdaDiff, hidden_dim=64,epsilon=1e-9):
    tf.set_random_seed(1234)
    self.X_count = X_count  # user count
    self.A_count = A_count  # item count 30938
    self.hidden_dim = hidden_dim
    self.lambdaDiff = tf.cast(lambdaDiff, tf.float32)
    self.epsilon = epsilon

    self.X = tf.placeholder(tf.float32, [None, self.hidden_dim])  # [B]
    self.item = tf.placeholder(tf.int32,[None,])
    self.label = tf.placeholder(tf.float32, [None,])  # reward
    self.display = tf.placeholder(tf.float32, [None,])  # [B]
    self.lr = tf.placeholder(tf.float64, [])
    self.beta_prob = tf.placeholder(tf.float32, [None,self.A_count])
    self.ori_beta_uncertainty = tf.placeholder(tf.float32, [None,])

    ############# Network ############
    self.item_emb_w = tf.get_variable("item_emb_w", [A_count, self.hidden_dim])
    self.item_b = tf.get_variable("item_b", [1,A_count], initializer=tf.constant_initializer(0.0))
    # Step variable
    self.global_step = tf.Variable(0, trainable=False, name='global_step')
    self.global_epoch_step = tf.Variable(0, trainable=False, name='global_epoch_step')
    self.global_epoch_step_op = tf.assign(self.global_epoch_step, self.global_epoch_step + 1)


    ################### LOSS###########################
    self.logits = tf.linalg.matmul(self.X, tf.transpose(self.item_emb_w)) + self.item_b
    self.softmax_prob = tf.keras.layers.Softmax(axis=-1)(self.logits)
    onehot_labels =tf.one_hot(self.item, self.A_count)
    loss_per_example = tf.nn.softmax_cross_entropy_with_logits_v2(labels=onehot_labels, logits=self.logits)
    temp_mul = tf.multiply(self.label-self.lambdaDiff, loss_per_example)
    self.importance_weight = self.getIPS()
    self.loss = tf.reduce_mean(tf.multiply(self.importance_weight, temp_mul))


    ############# UPDATE GRADIENT ############
    trainable_params = tf.trainable_variables()
    self.opt = tf.train.AdamOptimizer(learning_rate=self.lr)
    gradients = tf.gradients(self.loss, trainable_params)
    clip_gradients, _ = tf.clip_by_global_norm(gradients, 5)
    self.train_op = self.opt.apply_gradients(zip(clip_gradients, trainable_params), global_step=self.global_step)

  def getIPS(self):
    index = tf.one_hot(self.item, self.A_count)
    pi_prob_sg = tf.stop_gradient(self.softmax_prob)
    pi_prob_sg = tf.boolean_mask(pi_prob_sg, index)
    beta_prob = tf.boolean_mask(self.beta_prob,index)+self.epsilon
    ips_final = tf.math.divide(pi_prob_sg, beta_prob) #[B,1]
    return tf.stop_gradient(ips_final)

  def train(self, sess, uij, l, beta_prob,beta_uncertainty,summary_writer):
    loss, _  = sess.run([self.loss, self.train_op], feed_dict={
        self.X: uij[0],
        self.item: uij[1],
        self.label: uij[2],
        self.display:uij[3],
        self.lr: l,
        self.beta_prob: beta_prob,
        self.ori_beta_uncertainty: beta_uncertainty
        })
    return loss

  def run_evaluate_user(self,sess, x):
      eval_score = sess.run([self.logits],feed_dict={
          self.X: x
      })
      return eval_score

  def run_eval(self, sess, x):
     pi_prob = sess.run([self.softmax_prob], feed_dict={
       self.X: x
     })
     return pi_prob


  def save(self, sess, path):
    saver = tf.train.Saver()
    saver.save(sess, save_path=path)

  def restore(self, sess, path):
    saver = tf.train.Saver()
    saver.restore(sess, save_path=path)
