import pandas as pd
import numpy as np
import random

df = pd.read_csv("./ratings_processed.csv")

arr = df.to_numpy()

arr_tr = np.empty(arr.shape)
arr_t1 = np.empty(arr.shape)
arr_t2 = np.empty(arr.shape)

num = df.shape[0]

a1 = 0
a2 = 0


threshold = int(df["userId"].max() * 0.7)


for i in range(0, num):

    if arr[i, 0] < threshold:

        arr_tr[a1, :] = arr[i, :]
        a1 += 1

    else:
        
        if random.random() > 0.5:
            arr_t1[a2, 0] = arr[i, 0] - threshold
            arr_t1[a2, 1] = arr[i, 1]
            arr_t1[a2, 2] = 1
            arr_t2[a2, 0] = arr[i, 0] - threshold
            arr_t2[a2, 1] = arr[i, 1]
            arr_t2[a2, 2] = 0
        else:
            arr_t1[a2, 0] = arr[i, 0] - threshold
            arr_t1[a2, 1] = arr[i, 1]
            arr_t1[a2, 2] = 0
            arr_t2[a2, 0] = arr[i, 0] - threshold
            arr_t2[a2, 1] = arr[i, 1]
            arr_t2[a2, 2] = 1
        a2 += 1


arr_tr = arr_tr[0:a1, :]
arr_t1 = arr_t1[0:a2, :]
arr_t2 = arr_t2[0:a2, :]

#print(arr1)
#print(arr1.shape)
#print(arr2)
#print(arr2.shape)

df_tr = pd.DataFrame(arr_tr, columns = ["userId", "movieId", "rating"])
df_t1 = pd.DataFrame(arr_t1, columns = ["userId", "movieId", "rating"])
df_t2 = pd.DataFrame(arr_t2, columns = ["userId", "movieId", "rating"])

df_tr = df_tr.astype({"userId": int, "movieId": int, "rating": float})
df_t1 = df_t1.astype({"userId": int, "movieId": int, "rating": float})
df_t2 = df_t2.astype({"userId": int, "movieId": int, "rating": float})

df_tr.to_csv("./ratings_processed_train.csv", index = False)
df_t1.to_csv("./ratings_processed_test1.csv", index = False)
df_t2.to_csv("./ratings_processed_test2.csv", index = False)
