import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import pandas as pd
import numpy as np
from ucimlrepo import fetch_ucirepo
from utils import check_data

################################################################################
# (1) DATA LOADING
################################################################################
print("=" * 50)
print("STEP 1: DATA LOADING")
print("=" * 50)

# Set paths
base_dir = os.path.dirname(__file__)
RAW_PATH = os.path.join(base_dir, 'raw.csv')
OUT_PATH = os.path.join(base_dir, 'cardiotocography.csv')

# Fetch data from UCI repository
data_instance = fetch_ucirepo(id=193) 
      
# data (as pandas dataframes) 
X = data_instance.data.features
y = data_instance.data.targets

df = pd.concat([X, y], axis=1)
df.to_csv(RAW_PATH, index=False)

print("STEP 1 COMPLETED: Data loaded and raw file saved")
print("=" * 50)


################################################################################
# (2) FORMAT
################################################################################
print("STEP 2: FORMAT")
print("=" * 50)

# Map target column to 'label'
df['label'] = df['NSP'].map({1: 0, 2: 1, 3: 1})
df.drop(columns=['NSP', 'CLASS'], inplace=True)

print("STEP 2 COMPLETED: Data formatted and cleaned")
print("=" * 50)


################################################################################
# (3) VALIDATION
################################################################################
print("STEP 3: VALIDATION")
print("=" * 50)

df = check_data(df)

print("STEP 3 COMPLETED: Data validation passed")
print("=" * 50)


################################################################################
# (4) POSTPROCESSING & SAVE
################################################################################
print("STEP 4: POSTPROCESSING & SAVE")
print("=" * 50)


# Map tendency values to text
TENDENCY_MAPPING = {
    -1: "left asymmetric",
     0: "symmetric",
     1: "right asymmetric"
}

if 'Tendency' in df.columns:
    df['Tendency'] = df['Tendency'].map(TENDENCY_MAPPING)
    print("Mapped and overwritten 'Tendency' column with text values.")
else:
    raise ValueError("'Tendency' column not found in the dataset.")

# Save final processed data
df.to_csv(OUT_PATH, index=False)

print("STEP 4 COMPLETED: Final data saved")
print("=" * 50)
print("ALL PREPROCESSING STEPS COMPLETED!")
print("=" * 50)
