import torch.nn.functional as F
import torch.nn as nn
import torch
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

datasets = pd.read_csv('./ETTh1.csv',
                       encoding='utf-8',
                       low_memory=False)

print(datasets.shape)
datasets = datasets.drop(columns='date')

# 将空字符串 " " 替换为 NaN
datasets.replace(" ", np.nan, inplace=True)

# 查找全为零的列
zero_columns = datasets.columns[(datasets == 0).all()]
for i in zero_columns:
    datasets = datasets.drop(i, axis=1)

print(datasets.shape)

# 判断空值
q = np.any(pd.isnull(datasets))
print(q)

print(datasets.columns)

print(datasets)

# 只对数值列标准化
df_numeric = datasets.select_dtypes(include='number')
df_standardized = (df_numeric - df_numeric.mean()) / df_numeric.std()
print(df_standardized)

data = df_standardized.to_numpy()


print(data)


# PCA降维到1维
pca = PCA(n_components=1)
fused_features = pca.fit_transform(data)  # 输出形状 [100, 1]

print(fused_features)
df = pd.DataFrame(fused_features)

# counts = datasets['Normal/Attack'].value_counts()

# print(counts)

# ATTACK_CAT_TO_ID = {
#     'Normal': 0,
#     'Attack': 1,
# }
#
# datasets['Normal/Attack'] = datasets['Normal/Attack'].apply(func=(lambda x: ATTACK_CAT_TO_ID.get(x)))
#
# print(datasets)
#
# labels = datasets.iloc[:, -1]
# # print(labels.shape)
# # print(labels)
# #
# datasets.drop(columns=[' Timestamp', 'Normal/Attack'], inplace=True)
# # datasets.drop(columns=[' Timestamp'], inplace=True)
# #
# # print(datasets.shape)
# #
# # # 找出包含字符串的行索引
# # mask = datasets.applymap(lambda x: isinstance(x, str)).any(axis=1)
# #
# # # 删除这些行
# # df_cleaned = datasets[~mask]
# #
# # print("df_cleaned.shape:", df_cleaned.shape)
#
#
# datasets = datasets.loc[:, datasets.std() > 0]
#
# print("df_cleaned.shape:", datasets.shape)
#
# # 数据特征归一化处理
# result1 = datasets.copy()
# cols = datasets.columns
# # 正则化
# result = result1.copy()
# for feature_name in cols[0:-1]:
#     mean_value = result1[feature_name].mean()
#     std_value = result1[feature_name].std()
#     result[feature_name] = (result1[feature_name] - mean_value) / std_value
#
# print(result)
# print(result.shape)
#
# df_merged = pd.concat([result, labels], axis=1)
#
# print(df_merged)
#
# df_merged.to_csv('datasets_swat1_afterProcess.csv', index=False)

print(df)
df.to_csv('datasets_ETT_afterProcess.csv', index=False)
