# %% region import statements
import os  
import sys
print(sys.path)

import json 

import numpy as np
import random

from utils.data_functions import load_data_from_file, write_data_to_file
from utils.helper_functions import get_repo_path
# end region

# %% region general variables
seed_number = 42
save_flag = True

random.seed(seed_number)
np.random.seed(seed_number)

cluster_case = 'SC' # or LC (SC: small clusters, LC: large clusters)
file_path = f'data/train_val_data_{cluster_case}.txt'

if cluster_case == 'SC':
    b=1
elif cluster_case == 'LC':
    a = 1
else:
    raise ValueError('Invalid cluster_case value')

# end region

# %% ################################# directories #################################
script_dir = os.path.dirname(__file__)
print("script_dir: ", script_dir)

repo_path= get_repo_path(script_dir, 2)
print("repo_path: ", repo_path)

data_pkg_dir = os.path.join(repo_path,'src','data_pkg')
print("data_pkg_dir: ", data_pkg_dir)

# %% ################################# general variables #################################
lev_dist_lb = 5
lev_dist_ub = 13
split_ratio = 0.9 # train/val split ratio

config = {}

config['lev_dist_lb'] = lev_dist_lb
config['lev_dist_ub'] = lev_dist_ub
config['seed_number'] = seed_number
config['split_ratio'] = split_ratio

config['file_path'] = file_path

# Save config as JSON
config_json_path = f'data/config_prepare_{cluster_case}.json'
if save_flag:
    with open(config_json_path, 'w') as json_file:
        json.dump(config, json_file, indent=4)

# %% 
train_val_data = load_data_from_file(file_path)
random.shuffle(train_val_data)

# %% ################################## Create data sets ##################################
n = len(train_val_data)
train_data = train_val_data[:int(n*split_ratio)]
val_data = train_val_data[int(n*split_ratio):]

if save_flag:
    print('Saving data sets...')
    write_data_to_file(f'data/train_data_{cluster_case}.txt', train_data)
    write_data_to_file(f'data/val_data_{cluster_case}.txt', val_data)
    print('Data sets saved!')