import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
# import sys
# sys.path.append("mypath")

import numpy as np
# import tensorflow as tf
import pandas as pd

# import tensorflow_datasets as tfds
data_dir = '/home/Datasets/uci'
def read_data_graph():
    names = ['timestamp', 'user_id', 'item_id', 'feat']
    data = pd.read_csv(os.path.join(data_dir, 'uci.txt'), ' ', names=names,
                       engine='python')
    return data

data = read_data_graph()
### count ui valid numbers
N_U = data.user_id.unique().shape[0]
N_I = data.item_id.unique().shape[0]
print(N_U,N_I,data.timestamp[0])
### timestamp to a valid range
data.timestamp = (pd.to_datetime(data.timestamp).apply(lambda x: x.value)/1000000000).astype(int)
# data.timestamp = pd.to_datetime(data.timestamp).value
data.timestamp -= 1080000000
### map features
# Map_Type = {"atk":1.0, "clk":0.0}
# data.feature = data.feature.map(Map_Type)
data.feature = 0
print(data)

# exit()
data.to_csv(os.path.join(data_dir, 'uci.csv'),
            header=False,
            index=False)


from csv import reader
import pickle
# open file in read mode
clean=[]
usernames = {}
usercount=0
u_list, i_list, ts_list, label_list = [], [], [], []
feat_l = []
idx_list = []
with open(os.path.join(data_dir, 'uci.csv'), 'r') as read_obj:
    csv_reader = reader(read_obj)
    row_count=0
    for e in csv_reader:
        if row_count%10000==0:
            print(row_count)
            print(e)
        u = int(e[1])
        i = int(e[2])
        if u!=i:
          ts = float(e[0])
          label = float(1.0)  # int(e[3])
          feat = np.array([0.0]) # np.array([float(e[3])])

          if u in usernames:
              uid = usernames[u]
          else:
              uid = usercount
              usernames[u] = usercount
              usercount += 1
          if i in usernames:
              iid = usernames[i]
          else:
              iid = usercount
              usernames[i] = usercount
              usercount += 1

          u_list.append(uid)
          i_list.append(iid)
          # ts_list.append(ts) # /1000: 93.51+ /100000: 59.54
          ts_list.append(row_count) # 96.40 +
          label_list.append(label)
          idx_list.append(row_count)
          feat_l.append(feat)
          row_count += 1

print("WARNING! SAVING FEATURE IS NOT COMPLETED YET. TBI.")
data_final = pd.DataFrame({'u': u_list,
                           'i': i_list,
                           'ts': ts_list,
                           'label': label_list,
                           'feat':  np.array(feat_l)[:,0]})


print(row_count-1,usercount)
print(data_final)
data_final.to_csv(os.path.join(data_dir, 'uci.csv'),
                  header=False,
                  index=False)