# coding:utf8
import tensorflow.compat.v1 as tf
import numpy as np

class Pi_adaptive(object):
  def __init__(self, X_count, A_count,mu,alpha, hidden_dim=64,epsilon=1e-9):
    tf.set_random_seed(1234)
    self.X_count = X_count  # user count
    self.A_count = A_count  # item count 30938
    self.hidden_dim = hidden_dim
    self.mu = tf.cast(mu,tf.float32)
    self.alpha = tf.cast(alpha,tf.float32)

    self.epsilon = epsilon

    self.X = tf.placeholder(tf.float32, [None, self.hidden_dim])  # [B]
    self.item = tf.placeholder(tf.int32,[None,])
    self.label = tf.placeholder(tf.float32, [None,])  # reward
    self.display = tf.placeholder(tf.float32, [None,])  # [B]
    self.lr = tf.placeholder(tf.float64, [])
    self.beta_prob = tf.placeholder(tf.float32, [None,self.A_count])
    self.ori_beta_uncertainty = tf.placeholder(tf.float32, [None,])

    ############# Network ############
    self.item_emb_w = tf.get_variable("item_emb_w", [A_count, self.hidden_dim])
    self.item_b = tf.get_variable("item_b", [1,A_count], initializer=tf.constant_initializer(0.0))
    # Step variable
    self.global_step = tf.Variable(0, trainable=False, name='global_step')
    self.global_epoch_step = tf.Variable(0, trainable=False, name='global_epoch_step')
    self.global_epoch_step_op = tf.assign(self.global_epoch_step, self.global_epoch_step + 1)


    ################### LOSS###########################
    self.logits = tf.linalg.matmul(self.X, tf.transpose(self.item_emb_w)) + self.item_b
    self.softmax_prob = tf.keras.layers.Softmax(axis=-1)(self.logits)
    self.importance_weight = self.getIPS()
    Z_ak = self.importance_weight + self.alpha
    label_2D = tf.one_hot(self.item, self.A_count)
    label_weight_2D = self.mu * (1.0 - tf.one_hot(self.item, self.A_count)) + label_2D
    label_flat = tf.reshape(label_2D, [-1, 1])
    pi_score_flat = tf.reshape(self.logits, [-1, 1])
    loss_vec = tf.nn.sigmoid_cross_entropy_with_logits(
        logits=pi_score_flat,
        labels=label_flat)
    loss_vec_2D = tf.reshape(loss_vec, tf.shape(label_2D))
    loss_2D = tf.multiply(label_weight_2D, loss_vec_2D)
    pi_CE = tf.reduce_sum(loss_2D, axis=-1)
    self.loss = tf.reduce_mean(tf.multiply(Z_ak, pi_CE))




    ############# UPDATE GRADIENT ############
    trainable_params = tf.trainable_variables()
    self.opt = tf.train.AdamOptimizer(learning_rate=self.lr)
    gradients = tf.gradients(self.loss, trainable_params)
    clip_gradients, _ = tf.clip_by_global_norm(gradients, 5)
    self.train_op = self.opt.apply_gradients(zip(clip_gradients, trainable_params), global_step=self.global_step)

  def getIPS(self):
    index = tf.one_hot(self.item, self.A_count)
    pi_prob_sg = tf.stop_gradient(self.softmax_prob)
    pi_prob_sg = tf.boolean_mask(pi_prob_sg, index)
    beta_prob = tf.boolean_mask(self.beta_prob,index)+self.epsilon
    ips_final = tf.math.divide(pi_prob_sg, beta_prob) #[B,1]
    return tf.stop_gradient(ips_final)

  def train(self, sess, uij, l, beta_prob,beta_uncertainty,summary_writer):
    loss, _  = sess.run([self.loss, self.train_op], feed_dict={
        self.X: uij[0],
        self.item: uij[1],
        self.label: uij[2],
        self.display:uij[3],
        self.lr: l,
        self.beta_prob: beta_prob,
        self.ori_beta_uncertainty: beta_uncertainty
        })
    return loss

  def run_evaluate_user(self,sess, x):
      eval_score = sess.run([self.logits],feed_dict={
          self.X: x
      })
      return eval_score

  def run_eval(self, sess, x):
     pi_prob = sess.run([self.softmax_prob], feed_dict={
       self.X: x
     })
     return pi_prob


  def save(self, sess, path):
    saver = tf.train.Saver()
    saver.save(sess, save_path=path)

  def restore(self, sess, path):
    saver = tf.train.Saver()
    saver.restore(sess, save_path=path)
