import pandas as pd
import numpy as np

df = pd.read_csv("../ml-20m/ratings.csv")

df.pop("timestamp")

id1 = [0] * (df["userId"].max() + 1)
id2 = [0] * (df["movieId"].max() + 1)

num = df.shape[0]

arr0 = df.to_numpy()
arr = arr0[:, 0:2]
arr = arr.astype(int)

for i in range(num):
    id1[arr[i, 0]] += 1
    id2[arr[i, 1]] += 1

a1 = 0
a2 = 0

#Relabeling
for i in range(len(id1)):
    if id1[i] != 0:
        id1[i] = a1
        a1 += 1
for i in range(len(id2)):
    if id2[i] != 0:
        id2[i] = a2
        a2 += 1

for i in range(num):
    arr0[i, 0] = id1[arr[i, 0]]
    arr0[i, 1] = id2[arr[i, 1]]
    arr0[i, 2] = 1.0

df1 = pd.DataFrame(arr0, columns = ["userId", "movieId", "rating"])
df1 = df1.astype({"userId": int, "movieId": int, "rating": float})

print(df["userId"].max())
print(df["movieId"].max())

df1.to_csv("./ratings_processed.csv", index = False)
