# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""



Functions to create bandit problems from datasets.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import pandas as pd
import tensorflow as tf
if tf.__version__.startswith('1'):
    from tensorflow.contrib import learn
else:
    # Warning: Currently switching to TF2 prevents loading some of the non-UCI datasets
    import tensorflow.compat.v1 as tf
import bandits.data.data_helpers as data_helpers
from sklearn import preprocessing
def one_hot(df, cols):
  """Returns one-hot encoding of DataFrame df including columns in cols."""
  for col in cols:
    dummies = pd.get_dummies(df[col], prefix=col, drop_first=False)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(col, axis=1)
  return df


def sample_mushroom_data(file_name,
                         num_contexts,
                         r_noeat=0,
                         r_eat_safe=5,
                         r_eat_poison_bad=-35,
                         r_eat_poison_good=5,
                         prob_poison_bad=0.5):
  """Samples bandit game from Mushroom UCI Dataset.

  Args:
    file_name: Route of file containing the original Mushroom UCI dataset.
    num_contexts: Number of points to sample, i.e. (context, action rewards).
    r_noeat: Reward for not eating a mushroom.
    r_eat_safe: Reward for eating a non-poisonous mushroom.
    r_eat_poison_bad: Reward for eating a poisonous mushroom if harmed.
    r_eat_poison_good: Reward for eating a poisonous mushroom if not harmed.
    prob_poison_bad: Probability of being harmed by eating a poisonous mushroom.

  Returns:
    dataset: Sampled matrix with n rows: (context, eat_reward, no_eat_reward).
    opt_vals: Vector of expected optimal (reward, action) for each context.

  We assume r_eat_safe > r_noeat, and r_eat_poison_good > r_eat_poison_bad.
  """

  # first two cols of df encode whether mushroom is edible or poisonous
  df = pd.read_csv(file_name, header=None)
  df = one_hot(df, df.columns)
  ind = np.random.choice(range(df.shape[0]), num_contexts, replace=True)

  contexts = df.iloc[ind, 2:]
  no_eat_reward = r_noeat * np.ones((num_contexts, 1))
  random_poison = np.random.choice(
      [r_eat_poison_bad, r_eat_poison_good],
      p=[prob_poison_bad, 1 - prob_poison_bad],
      size=num_contexts)
  eat_reward = r_eat_safe * df.iloc[ind, 0]
  eat_reward += np.multiply(random_poison, df.iloc[ind, 1])
  eat_reward = eat_reward.values.reshape((num_contexts, 1))

  # compute optimal expected reward and optimal actions
  exp_eat_poison_reward = r_eat_poison_bad * prob_poison_bad
  exp_eat_poison_reward += r_eat_poison_good * (1 - prob_poison_bad)
  opt_exp_reward = r_eat_safe * df.iloc[ind, 0] + max(
      r_noeat, exp_eat_poison_reward) * df.iloc[ind, 1]

  if r_noeat > exp_eat_poison_reward:
    # actions: no eat = 0 ; eat = 1
    opt_actions = df.iloc[ind, 0]  # indicator of edible
  else:
    # should always eat (higher expected reward)
    opt_actions = np.ones((num_contexts, 1))

  opt_vals = (opt_exp_reward.values, opt_actions.values)
  return np.hstack((contexts, no_eat_reward, eat_reward)), opt_vals


def sample_stock_data(file_name, context_dim, num_actions, num_contexts,
                      sigma, shuffle_rows=True):
  """Samples linear bandit game from stock prices dataset.

  Args:
    file_name: Route of file containing the stock prices dataset.
    context_dim: Context dimension (i.e. vector with the price of each stock).
    num_actions: Number of actions (different linear portfolio strategies).
    num_contexts: Number of contexts to sample.
    sigma: Vector with additive noise levels for each action.
    shuffle_rows: If True, rows from original dataset are shuffled.

  Returns:
    dataset: Sampled matrix with rows: (context, reward_1, ..., reward_k).
    opt_vals: Vector of expected optimal (reward, action) for each context.
  """

  with tf.gfile.Open(file_name, 'r') as f:
    contexts = np.loadtxt(f, skiprows=1)

  if shuffle_rows:
    np.random.shuffle(contexts)
  contexts = contexts[:num_contexts, :]

  betas = np.random.uniform(-1, 1, (context_dim, num_actions))
  betas /= np.linalg.norm(betas, axis=0)

  mean_rewards = np.dot(contexts, betas)
  noise = np.random.normal(scale=sigma, size=mean_rewards.shape)
  rewards = mean_rewards + noise

  opt_actions = np.argmax(mean_rewards, axis=1)
  opt_rewards = [mean_rewards[i, a] for i, a in enumerate(opt_actions)]
  return np.hstack((contexts, rewards)), (np.array(opt_rewards), opt_actions)


def sample_jester_data(file_name, context_dim, num_actions, num_contexts,
                       shuffle_rows=True, shuffle_cols=False):
  """Samples bandit game from (user, joke) dense subset of Jester dataset.

  Args:
    file_name: Route of file containing the modified Jester dataset.
    context_dim: Context dimension (i.e. vector with some ratings from a user).
    num_actions: Number of actions (number of joke ratings to predict).
    num_contexts: Number of contexts to sample.
    shuffle_rows: If True, rows from original dataset are shuffled.
    shuffle_cols: Whether or not context/action jokes are randomly shuffled.

  Returns:
    dataset: Sampled matrix with rows: (context, rating_1, ..., rating_k).
    opt_vals: Vector of deterministic optimal (reward, action) for each context.
  """

  with tf.gfile.Open(file_name, 'rb') as f:
    dataset = np.load(f)

  if shuffle_cols:
    dataset = dataset[:, np.random.permutation(dataset.shape[1])]
  if shuffle_rows:
    np.random.shuffle(dataset)
  dataset = dataset[:num_contexts, :]

  assert context_dim + num_actions == dataset.shape[1], 'Wrong data dimensions.'

  opt_actions = np.argmax(dataset[:, context_dim:], axis=1)
  opt_rewards = np.array([dataset[i, context_dim + a]
                          for i, a in enumerate(opt_actions)])

  return dataset, (opt_rewards, opt_actions)


def sample_statlog_data(file_name, num_contexts, shuffle_rows=True,
                        remove_underrepresented=False):
  """Returns bandit problem dataset based on the UCI statlog data.

  Args:
    file_name: Route of file containing the Statlog dataset.
    num_contexts: Number of contexts to sample.
    shuffle_rows: If True, rows from original dataset are shuffled.
    remove_underrepresented: If True, removes arms with very few rewards.

  Returns:
    dataset: Sampled matrix with rows: (context, action rewards).
    opt_vals: Vector of deterministic optimal (reward, action) for each context.

  https://archive.ics.uci.edu/ml/datasets/Statlog+(Shuttle)
  """

  with tf.gfile.Open(file_name, 'r') as f:
    data = np.loadtxt(f)

  num_actions = 7  # some of the actions are very rarely optimal.

  # Shuffle data
  if shuffle_rows:
    np.random.shuffle(data)
  data = data[:num_contexts, :]

  # Last column is label, rest are features
  contexts = data[:, :-1]
  labels = data[:, -1].astype(int) - 1  # convert to 0 based index

  if remove_underrepresented:
    contexts, labels = remove_underrepresented_classes(contexts, labels)

  return classification_to_bandit_problem(contexts, labels, num_actions)


def sample_adult_data(file_name, num_contexts, shuffle_rows=True,
                      remove_underrepresented=False):
  """Returns bandit problem dataset based on the UCI adult data.

  Args:
    file_name: Route of file containing the Adult dataset.
    num_contexts: Number of contexts to sample.
    shuffle_rows: If True, rows from original dataset are shuffled.
    remove_underrepresented: If True, removes arms with very few rewards.

  Returns:
    dataset: Sampled matrix with rows: (context, action rewards).
    opt_vals: Vector of deterministic optimal (reward, action) for each context.

  Preprocessing:
    * drop rows with missing values
    * convert categorical variables to 1 hot encoding

  https://archive.ics.uci.edu/ml/datasets/census+income
  """
  with tf.gfile.Open(file_name, 'r') as f:
    df = pd.read_csv(f, header=None,
                     na_values=[' ?']).dropna()

  num_actions = 2

  if shuffle_rows:
    df = df.sample(frac=1)
  df = df.iloc[:num_contexts, :]

  labels = df[14].astype('str')
  df = df.drop([14], axis=1)
  df = df.drop([6], axis=1)

  labels=labels.str.replace('.','')
  labels = labels.astype('category').cat.codes.to_numpy()

  # Convert categorical variables to 1 hot encoding
  cols_to_transform = [1, 3, 5,7, 8, 9, 13]
  df = pd.get_dummies(df, columns=cols_to_transform)
  #df = preprocessing.StandardScaler().fit_transform(df)
  if remove_underrepresented:
    df, labels = remove_underrepresented_classes(df, labels)
  contexts = df.to_numpy()
  print(contexts.shape)
  return classification_to_bandit_problem(contexts, labels, num_actions)

  #labels = df[14].astype('str')
  #df = df.drop([14], axis=1)

#  labels=labels.str.replace('.','')
 # labels = labels.astype('category').cat.codes.to_numpy()
def sample_census_data(file_name, num_contexts, shuffle_rows=True,
                       remove_underrepresented=False):
  """Returns bandit problem dataset based on the UCI census data.

  Args:
    file_name: Route of file containing the Census dataset.
    num_contexts: Number of contexts to sample.
    shuffle_rows: If True, rows from original dataset are shuffled.
    remove_underrepresented: If True, removes arms with very few rewards.

  Returns:
    dataset: Sampled matrix with rows: (context, action rewards).
    opt_vals: Vector of deterministic optimal (reward, action) for each context.

  Preprocessing:
    * drop rows with missing labels
    * convert categorical variables to 1 hot encoding

  Note: this is the processed (not the 'raw') dataset. It contains a subset
  of the raw features and they've all been discretized.

  https://archive.ics.uci.edu/ml/datasets/US+Census+Data+%281990%29
  """
  # Note: this dataset is quite large. It will be slow to load and preprocess.
  with tf.gfile.Open(file_name, 'r') as f:
    df = (pd.read_csv(f, header=0, na_values=['?'])
          .dropna())

  num_actions = 9

  if shuffle_rows:
    df = df.sample(frac=1)
  df = df.iloc[:num_contexts, :]

  # Assuming what the paper calls response variable is the label?
  labels = df['dOccup'].astype('category').cat.codes.to_numpy()
  # In addition to label, also drop the (unique?) key.
  df = df.drop(['dOccup', 'caseid'], axis=1)

  # All columns are categorical. Convert to 1 hot encoding.
  df = pd.get_dummies(df, columns=df.columns)

  if remove_underrepresented:
    df, labels = remove_underrepresented_classes(df, labels)
  contexts = df.to_numpy()
  print(contexts.shape)
  return classification_to_bandit_problem(contexts, labels, num_actions)

def sample_eeg_data(file_name, num_contexts, shuffle_rows=True,
                      remove_underrepresented=False):

  with tf.gfile.Open(file_name, 'r') as f:
    df = pd.read_csv(f, header=None,na_values=[' ?']).dropna()

  num_actions = 5
  if shuffle_rows:
    df = df.sample(frac=1)
  df = df.iloc[:num_contexts, :]
  labels = df[179].astype(int) - 1  # convert to 0 based index
  df = df.drop([179], axis=1)
  df = df.drop([0], axis=1)

  if remove_underrepresented:
    df, labels = remove_underrepresented_classes(df, labels)
  #df = preprocessing.StandardScaler().fit_transform(df)
  contexts = df.to_numpy().astype(np.float)

  return classification_to_bandit_problem(contexts, labels, num_actions)





def sample_phone_data(file_name, num_contexts, shuffle_rows=True,
                      remove_underrepresented=False):

  with tf.gfile.Open(file_name, 'r') as f:
    df = pd.read_csv(f, header=None,na_values=[' ?']).dropna()

  num_actions = 6
  if shuffle_rows:
    df = df.sample(frac=1)
  df = df.iloc[:num_contexts, :]
  labels = df[df.columns[-1]].astype('category').cat.codes.to_numpy()
  df = df.drop([df.columns[-1]], axis=1)
  df = df.drop([df.columns[-1]], axis=1)
  df = df.drop([0], axis=1)

  if remove_underrepresented:
    df, labels = remove_underrepresented_classes(df, labels)
  #df = preprocessing.StandardScaler().fit_transform(df)
  contexts = df.to_numpy().astype(np.float)
  #print(df[0:3])
  return classification_to_bandit_problem(contexts, labels, num_actions)


def sample_aps_data(file_name, num_contexts, shuffle_rows=True,
                      remove_underrepresented=False):

  with tf.gfile.Open(file_name, 'r') as f:
    df = pd.read_csv(f, header=21,na_values=['na']).fillna(0)
  df = df.iloc[:num_contexts, :]

  labels = df[df.columns[0]].astype('category').cat.codes.to_numpy()
  df = df.drop([df.columns[0]], axis=1)

  num_actions = 2

  if shuffle_rows:
    df = df.sample(frac=1)
  if remove_underrepresented:
    df, labels = remove_underrepresented_classes(df, labels)
  contexts = df.to_numpy().astype(np.float)
  contexts, rewards, (opt_rewards, opt_actions) = classification_to_bandit_problem(contexts, labels, num_actions)
  if 1:
    for r in rewards:
      if r[0]==1:r[0]=10
      if r[1]==1:r[1]=500

  return contexts, rewards, (opt_rewards, opt_actions)

def sample_diabetic_data(file_name, num_contexts, shuffle_rows=True,
                      remove_underrepresented=True):

  with tf.gfile.Open(file_name, 'r') as f:
    df = pd.read_csv(f, header=None,na_values=[' ?']).dropna()

  num_actions = 3
  if shuffle_rows:
    df = df.sample(frac=1)
  df = df.iloc[:num_contexts, :]

  #print(df.shape)
  labels = df[df.columns[-1]].astype('category').cat.codes.to_numpy()
  df = df.drop([df.columns[-1]], axis=1)
  df = df.drop([df.columns[20]], axis=1)
  df = df.drop([df.columns[19]], axis=1)
  df = df.drop([df.columns[18]], axis=1)
  df = df.drop([df.columns[10]], axis=1)

  df = df.drop([df.columns[5]], axis=1)
  df = df.drop([df.columns[1]], axis=1)
  df = df.drop([df.columns[0]], axis=1)

  #print(df[0:3])

  cols_to_transform = [2, 3, 4,11,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48]
  df = pd.get_dummies(df, columns=cols_to_transform)
  #print(df.shape)

  if remove_underrepresented:
    df, labels = remove_underrepresented_classes(df, labels)
  #df = preprocessing.StandardScaler().fit_transform(df)
  contexts = df.to_numpy().astype(np.float)
  #print(df[0:3])

  return classification_to_bandit_problem(contexts, labels, num_actions)

def sample_txt_data(file_name, num_contexts, shuffle_rows=True,
                      remove_underrepresented=False):
  x_text, yy = data_helpers.load_data_and_labels(file_name[0],file_name[1])

  y=[]
  for i_y in yy:
    if i_y[0]==1:
      y.append(0)
    else:
      y.append(1)
  y  = np.array(y)
  num_actions=2
  # Build vocabulary
  max_document_length = max([len(x.split(" ")) for x in x_text])
  vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
  x = np.array(list(vocab_processor.fit_transform(x_text)))
  # Randomly shuffle data
  shuffle_indices = np.random.permutation(np.arange(len(y)))
  x_shuffled = x[shuffle_indices]
  y_shuffled = y[shuffle_indices]
  x_shuffled = x_shuffled[:num_contexts]
  y_shuffled = y_shuffled[:num_contexts]
  _, rewards, (opt_rewards, opt_actions) = classification_to_bandit_problem(x_shuffled, y_shuffled, num_actions)

  return x_shuffled, rewards, (opt_rewards, opt_actions), vocab_processor

def sample_amazon_data(file_name, num_contexts, shuffle_rows=True,
                      remove_underrepresented=False):
  with tf.gfile.Open(file_name, 'r') as f:
    df = pd.read_csv(f, header=21, na_values=['na']).fillna(0)
  df = df.iloc[:num_contexts, :]
  labels = df[df.columns[3]].astype('category').cat.codes.to_numpy()

  x_text = df[df.columns[4]].to_numpy().astype(str)
  x_text = [s.strip() for s in x_text]
  x_text = [data_helpers.clean_str(sent) for sent in x_text]
  num_actions = 5

    # Build vocabulary
  #max_document_length = max([len(x.split(" ")) for x in x_text])
  vocab_processor = learn.preprocessing.VocabularyProcessor(60)
  x = np.array(list(vocab_processor.fit_transform(x_text)))
  # Randomly shuffle data
  shuffle_indices = np.random.permutation(np.arange(len(labels)))
  x_shuffled = x[shuffle_indices]
  y_shuffled = labels[shuffle_indices]
  x_shuffled = x_shuffled[:num_contexts]
  y_shuffled = y_shuffled[:num_contexts]
  _, rewards, (opt_rewards, opt_actions) = classification_to_bandit_problem(x_shuffled, y_shuffled, num_actions)
  print(rewards)
  return x_shuffled, rewards, (opt_rewards, opt_actions), vocab_processor



def sample_covertype_data(file_name, num_contexts, shuffle_rows=True,
                          remove_underrepresented=False):
  """Returns bandit problem dataset based on the UCI Cover_Type data.

  Args:
    file_name: Route of file containing the Covertype dataset.
    num_contexts: Number of contexts to sample.
    shuffle_rows: If True, rows from original dataset are shuffled.
    remove_underrepresented: If True, removes arms with very few rewards.

  Returns:
    dataset: Sampled matrix with rows: (context, action rewards).
    opt_vals: Vector of deterministic optimal (reward, action) for each context.

  Preprocessing:
    * drop rows with missing labels
    * convert categorical variables to 1 hot encoding

  https://archive.ics.uci.edu/ml/datasets/Covertype
  """
  with tf.gfile.Open(file_name, 'r') as f:
    df = (pd.read_csv(f, header=0, na_values=['?'])
          .dropna())

  num_actions = 7

  if shuffle_rows:
    df = df.sample(frac=1)
  df = df.iloc[:num_contexts, :]

  # Assuming what the paper calls response variable is the label?
  # Last column is label.
  labels = df[df.columns[-1]].astype('category').cat.codes.to_numpy()
  df = df.drop([df.columns[-1]], axis=1)

  # All columns are either quantitative or already converted to 1 hot.
  if remove_underrepresented:
    df, labels = remove_underrepresented_classes(df, labels)
  #df = preprocessing.StandardScaler().fit_transform(df)
  contexts = df.to_numpy()

  return classification_to_bandit_problem(contexts, labels, num_actions)


def classification_to_bandit_problem(contexts, labels, num_actions=None):
  """Normalize contexts and encode deterministic rewards."""

  if num_actions is None:
    num_actions = np.max(labels) + 1
  num_contexts = contexts.shape[0]
  # Due to random subsampling in small problems, some features may be constant
  sstd = safe_std(np.std(contexts, axis=0, keepdims=True)[0, :])
  # Normalize features
  contexts = ((contexts - np.mean(contexts, axis=0, keepdims=True)) / sstd)

  # One hot encode labels as rewards
  rewards = np.zeros((num_contexts, num_actions))
  rewards[np.arange(num_contexts), labels] = 1.0

  return contexts, rewards, (np.ones(num_contexts), labels)


def safe_std(values):
  """Remove zero std values for ones."""
  return np.array([val if val != 0.0 else 1.0 for val in values])


def remove_underrepresented_classes(features, labels, thresh=0.0005):
  """Removes classes when number of datapoints fraction is below a threshold."""

  # Threshold doesn't seem to agree with https://arxiv.org/pdf/1706.04687.pdf
  # Example: for Covertype, they report 4 classes after filtering, we get 7?
  total_count = labels.shape[0]
  unique, counts = np.unique(labels, return_counts=True)
  ratios = counts.astype('float') / total_count
  vals_and_ratios = dict(zip(unique, ratios))
  print('Unique classes and their ratio of total: %s' % vals_and_ratios)
  keep = [vals_and_ratios[v] >= thresh for v in labels]
  return features[keep], labels[np.array(keep)]


