import torch.nn.functional as F
import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split

datasets = pd.read_csv('./SWaT_Dataset_Attack_v0.csv',
                       encoding='utf-8',
                       low_memory=False)

print(datasets.shape)

# 将空字符串 " " 替换为 NaN
datasets.replace(" ", np.nan, inplace=True)

# 查找全为零的列
zero_columns = datasets.columns[(datasets == 0).all()]
for i in zero_columns:
    datasets = datasets.drop(i, axis=1)

print(datasets.shape)

# 判断空值
q = np.any(pd.isnull(datasets))
print(q)

print(datasets.columns)

counts = datasets['Normal/Attack'].value_counts()

print(counts)

ATTACK_CAT_TO_ID = {
    'Normal': 0,
    'Attack': 1,
}

datasets['Normal/Attack'] = datasets['Normal/Attack'].apply(func=(lambda x: ATTACK_CAT_TO_ID.get(x)))

print(datasets)

labels = datasets.iloc[:, -1]
# print(labels.shape)
# print(labels)
#
datasets.drop(columns=[' Timestamp', 'Normal/Attack'], inplace=True)
# datasets.drop(columns=[' Timestamp'], inplace=True)
#
# print(datasets.shape)
#
# # 找出包含字符串的行索引
# mask = datasets.applymap(lambda x: isinstance(x, str)).any(axis=1)
#
# # 删除这些行
# df_cleaned = datasets[~mask]
#
# print("df_cleaned.shape:", df_cleaned.shape)


datasets = datasets.loc[:, datasets.std() > 0]

print("df_cleaned.shape:", datasets.shape)

# 数据特征归一化处理
result1 = datasets.copy()
cols = datasets.columns
# 正则化
result = result1.copy()
for feature_name in cols[0:-1]:
    mean_value = result1[feature_name].mean()
    std_value = result1[feature_name].std()
    result[feature_name] = (result1[feature_name] - mean_value) / std_value

print(result)
print(result.shape)

df_merged = pd.concat([result, labels], axis=1)

print(df_merged)

df_merged.to_csv('datasets_swat1_afterProcess.csv', index=False)
