import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
# import sys
# sys.path.append("mypath")

import numpy as np
import tensorflow as tf
import pandas as pd

# import tensorflow_datasets as tfds
data_dir = '/home/Project_MetaSale/Dataset'
def read_data_graph(time):
    names = ['user_id', 'item_id', 'timestamp', 'query_id', 'label']
    data = pd.read_csv(os.path.join(data_dir, time), ' ', names=names,
                       engine='python')
    return data

time_list = ['2022-12-14','2022-12-15','2022-12-16']
data=[0,0,0]
for t,time in enumerate(time_list):
    data[t] = read_data_graph(time_list[t])
    data[t] = data[t].sort_values(by="timestamp")
    ### count ui valid numbers
    N_U = data[t].user_id.unique().shape[0]
    N_I = data[t].item_id.unique().shape[0]
    print(N_U,N_I) # day 1: 25588 87348, day 2: 24623 83942

    ### timestamp to a valid range
    data[t].timestamp -= 844852
    ### map features
    # Map_Type = {"atk":1.0, "clk":0.0}
    # data.feature = data.feature.map(Map_Type)
    data[t].feature = 0
    data[t] = data[t][data[t]["label"]>0]
    print(data[t])
    # [0~86399] 103893 rows || [86399~172799] 97310 rows || 172799~259199 90051 rows

_data=data[0]
for i in range(1,len(time_list)):
  _data=_data.append(data[i])
data = _data
### count ui valid numbers
N_U = data.user_id.unique().shape[0]
N_I = data.item_id.unique().shape[0]
print(N_U,N_I) # 40878 87953, 201203 rows
print(data)
# exit()

data.to_csv(os.path.join(data_dir, 'metasales.csv'),
            header=False,
            index=False)


from csv import reader
import pickle
# open file in read mode
clean=[]
usernames = {}
itemnames = {}
usercount=0
itemcount=0
u_list, i_list, ts_list, label_list = [], [], [], []
feat_l = []
idx_list = []
with open(os.path.join(data_dir, 'metasales.csv'), 'r') as read_obj:
    csv_reader = reader(read_obj)
    row_count=0
    for e in csv_reader:
        if row_count%10000==0:
            print(row_count)
            print(e)
        u = int(e[0])
        i = int(e[1])
        ts = float(e[2])
        label = float(e[4])
        if label <= 0:
            continue
        feat = np.array([0.0])

        if u in usernames:
            uid = usernames[u]
        else:
            uid = usercount
            usernames[u] = usercount
            usercount += 1
        if i in itemnames:
            iid = itemnames[i]
        else:
            iid = itemcount
            itemnames[i] = itemcount
            itemcount += 1

        u_list.append(uid)
        i_list.append(iid)
        ts_list.append(int(ts)) # /100 ap 8947
        label_list.append(label)
        idx_list.append(row_count)
        feat_l.append(feat)
        row_count += 1

print("WARNING! SAVING FEATURE IS NOT COMPLETED YET. TBI.")
data_final = pd.DataFrame({'u': u_list,
                           'i': i_list,
                           'ts': ts_list,
                           'label': label_list,
                           'feat':  np.array(feat_l)[:,0]})


print(row_count-1,usercount,itemcount) # 103892 25588 59122
print(data_final)
data_final.to_csv(os.path.join(data_dir, 'metasales.csv'),
                  header=False,
                  index=False)