{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('./data/最终数据.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.max_rows', None)\n",
    "pd.set_option('display.max_colwidth', None)\n",
    "pd.set_option('display.width', 1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.dropna(subset=['疼痛缓解情况及用药后疼痛评分（1.完全缓解2.部分缓解3.轻度缓解4.无效）'])  # 删除分类标签缺失的行"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将所有缺失值填充为0\n",
    "df = df.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if df.isnull().values.any():\n",
    "    print(\"DataFrame 中含有 NaN 值\")\n",
    "else:\n",
    "    print(\"DataFrame 中不含有 NaN 值\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['性别 '] = df['性别 '].astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['年龄'] = df['年龄'].astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['吸烟史（是1，否0）'] = df['吸烟史（是1，否0）'].astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['饮酒史（是1，否0）'] = df['饮酒史（是1，否0）'].astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['过敏史（是1，否）'] = df['过敏史（是1，否）'].astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['心血管风险（1是，0否）'] = df['心血管风险（1是，0否）'].astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['胃肠道风险（1是，0否）'] = df['胃肠道风险（1是，0否）'].astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['缓释强阿片'] = df['缓释强阿片'].astype('float64')\n",
    "df['即释强阿片'] = df['即释强阿片'].astype('float64')\n",
    "df['缓释弱阿片'] = df['缓释弱阿片'].astype('float64')\n",
    "df['即释弱阿片'] = df['即释弱阿片'].astype('float64')\n",
    "df['非甾体抗炎药'] = df['非甾体抗炎药'].astype('float64')\n",
    "\n",
    "df['抗惊厥/抗抑郁类药物'] = df['抗惊厥/抗抑郁类药物'].astype('float64')\n",
    "df['其他'] = df['其他'].astype('float64')\n",
    "df['是否阿片耐受（是1，否0）'] = df['是否阿片耐受（是1，否0）'].astype('float64')\n",
    "df['便秘（是1,否0）'] = df['便秘（是1,否0）'].astype('float64')\n",
    "df['恶心或呕吐（是1,否0）'] = df['恶心或呕吐（是1,否0）'].astype('float64')\n",
    "df['其他（是1,否0）'] = df['其他（是1,否0）'].astype('float64')\n",
    "df['不良反应用药（是1,否0）'] = df['不良反应用药（是1,否0）'].astype('float64')\n",
    "df['缓释强阿片.1'] = df['缓释强阿片.1'].astype('float64')\n",
    "df['即释强阿片.1'] = df['即释强阿片.1'].astype('float64')\n",
    "df['缓释弱阿片.1'] = df['缓释弱阿片.1'].astype('float64')\n",
    "df['即释弱阿片.1'] = df['即释弱阿片.1'].astype('float64')\n",
    "df['非甾体抗炎药.1'] = df['非甾体抗炎药.1'].astype('float64')\n",
    "df['抗惊厥/抗抑郁类药物.1'] = df['抗惊厥/抗抑郁类药物.1'].astype('float64')\n",
    "df['其他.1'] = df['其他.1'].astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['镇痛药物控制时长'] = df['镇痛药物控制时长'].astype('float64')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将日期字符串转换为 datetime 类型\n",
    "df['入组日期'] = pd.to_datetime(df['入组日期'], format='%Y/%m/%d %H:%M')\n",
    "\n",
    "# 转换 datetime 为时间戳（单位为秒），并转换为浮点数\n",
    "df['入组日期'] = df['入组日期'].apply(lambda x: x.timestamp()).astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.groupby('姓名').size().max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "float_cols = df.select_dtypes(include=['float64']).columns\n",
    "float_cols = float_cols.drop('疼痛缓解情况及用药后疼痛评分（1.完全缓解2.部分缓解3.轻度缓解4.无效）')\n",
    "\n",
    "scaler = StandardScaler()\n",
    "\n",
    "# 对特定列进行标准化处理\n",
    "df.loc[:, float_cols] = scaler.fit_transform(df[float_cols])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['疼痛缓解情况及用药后疼痛评分（1.完全缓解2.部分缓解3.轻度缓解4.无效）'] -= 1 # 把标签转换为从0开始\n",
    "df['疼痛缓解情况及用药后疼痛评分（1.完全缓解2.部分缓解3.轻度缓解4.无效）'] = df['疼痛缓解情况及用药后疼痛评分（1.完全缓解2.部分缓解3.轻度缓解4.无效）'].astype('int')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if df.isnull().values.any():\n",
    "    print(\"DataFrame 中含有 NaN 值\")\n",
    "else:\n",
    "    print(\"DataFrame 中不含有 NaN 值\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_pickle('./df_class.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('./data/最终数据.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将所有缺失值填充为0\n",
    "df = df.fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if df.isnull().values.any():\n",
    "    print(\"DataFrame 中含有 NaN 值\")\n",
    "else:\n",
    "    print(\"DataFrame 中不含有 NaN 值\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['性别 '] = df['性别 '].astype('float64')\n",
    "df['年龄'] = df['年龄'].astype('float64')\n",
    "df['吸烟史（是1，否0）'] = df['吸烟史（是1，否0）'].astype('float64')\n",
    "df['饮酒史（是1，否0）'] = df['饮酒史（是1，否0）'].astype('float64')\n",
    "df['过敏史（是1，否）'] = df['过敏史（是1，否）'].astype('float64')\n",
    "df['心血管风险（1是，0否）'] = df['心血管风险（1是，0否）'].astype('float64')\n",
    "df['胃肠道风险（1是，0否）'] = df['胃肠道风险（1是，0否）'].astype('float64')\n",
    "df['缓释强阿片'] = df['缓释强阿片'].astype('float64')\n",
    "df['即释强阿片'] = df['即释强阿片'].astype('float64')\n",
    "df['缓释弱阿片'] = df['缓释弱阿片'].astype('float64')\n",
    "df['即释弱阿片'] = df['即释弱阿片'].astype('float64')\n",
    "df['非甾体抗炎药'] = df['非甾体抗炎药'].astype('float64')\n",
    "\n",
    "df['抗惊厥/抗抑郁类药物'] = df['抗惊厥/抗抑郁类药物'].astype('float64')\n",
    "df['其他'] = df['其他'].astype('float64')\n",
    "df['是否阿片耐受（是1，否0）'] = df['是否阿片耐受（是1，否0）'].astype('float64')\n",
    "df['便秘（是1,否0）'] = df['便秘（是1,否0）'].astype('float64')\n",
    "df['恶心或呕吐（是1,否0）'] = df['恶心或呕吐（是1,否0）'].astype('float64')\n",
    "df['其他（是1,否0）'] = df['其他（是1,否0）'].astype('float64')\n",
    "df['不良反应用药（是1,否0）'] = df['不良反应用药（是1,否0）'].astype('float64')\n",
    "df['缓释强阿片.1'] = df['缓释强阿片.1'].astype('float64')\n",
    "df['即释强阿片.1'] = df['即释强阿片.1'].astype('float64')\n",
    "df['缓释弱阿片.1'] = df['缓释弱阿片.1'].astype('float64')\n",
    "df['即释弱阿片.1'] = df['即释弱阿片.1'].astype('float64')\n",
    "df['非甾体抗炎药.1'] = df['非甾体抗炎药.1'].astype('float64')\n",
    "df['抗惊厥/抗抑郁类药物.1'] = df['抗惊厥/抗抑郁类药物.1'].astype('float64')\n",
    "df['其他.1'] = df['其他.1'].astype('float64')\n",
    "\n",
    "# 替换列'镇痛药物控制时长'中的空字符串和空白字符串为0\n",
    "df['镇痛药物控制时长'] = df['镇痛药物控制时长'].replace(['', ' '], 0)\n",
    "df['镇痛药物控制时长'] = df['镇痛药物控制时长'].astype('float64')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将日期字符串转换为 datetime 类型\n",
    "df['入组日期'] = pd.to_datetime(df['入组日期'], format='%Y/%m/%d %H:%M')\n",
    "\n",
    "# 转换 datetime 为时间戳（单位为秒），并转换为浮点数\n",
    "df['入组日期'] = df['入组日期'].apply(lambda x: x.timestamp()).astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.groupby('姓名').size().max()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_re_1 = df.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import StandardScaler\n",
    "float_cols = df_re_1.select_dtypes(include=['float64']).columns\n",
    "float_cols = float_cols.drop('缓释强阿片.1')\n",
    "scaler = StandardScaler()\n",
    "\n",
    "# 对特定列进行标准化处理\n",
    "df_re_1.loc[:, float_cols] = scaler.fit_transform(df_re_1[float_cols])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_re_1.to_pickle('./df_re_1.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_re_2 = df.copy()\n",
    "\n",
    "float_cols = df_re_2.select_dtypes(include=['float64']).columns\n",
    "float_cols = float_cols.drop('即释强阿片.1')\n",
    "scaler = StandardScaler()\n",
    "\n",
    "# 对特定列进行标准化处理\n",
    "df_re_2.loc[:, float_cols] = scaler.fit_transform(df_re_2[float_cols])\n",
    "df_re_2.to_pickle('./df_re_2.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_re_3 = df.copy()\n",
    "\n",
    "float_cols = df_re_3.select_dtypes(include=['float64']).columns\n",
    "float_cols = float_cols.drop('缓释弱阿片.1')\n",
    "scaler = StandardScaler()\n",
    "\n",
    "# 对特定列进行标准化处理\n",
    "df_re_3.loc[:, float_cols] = scaler.fit_transform(df_re_3[float_cols])\n",
    "df_re_3.to_pickle('./df_re_3.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_re_4 = df.copy()\n",
    "\n",
    "float_cols = df_re_4.select_dtypes(include=['float64']).columns\n",
    "float_cols = float_cols.drop('即释弱阿片.1')\n",
    "scaler = StandardScaler()\n",
    "\n",
    "# 对特定列进行标准化处理\n",
    "df_re_4.loc[:, float_cols] = scaler.fit_transform(df_re_4[float_cols])\n",
    "df_re_4.to_pickle('./df_re_4.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_re_5 = df.copy()\n",
    "\n",
    "float_cols = df_re_5.select_dtypes(include=['float64']).columns\n",
    "float_cols = float_cols.drop('非甾体抗炎药.1')\n",
    "scaler = StandardScaler()\n",
    "\n",
    "# 对特定列进行标准化处理\n",
    "df_re_5.loc[:, float_cols] = scaler.fit_transform(df_re_5[float_cols])\n",
    "df_re_5.to_pickle('./df_re_5.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_re_6 = df.copy()\n",
    "\n",
    "float_cols = df_re_6.select_dtypes(include=['float64']).columns\n",
    "float_cols = float_cols.drop('抗惊厥/抗抑郁类药物.1')\n",
    "scaler = StandardScaler()\n",
    "\n",
    "# 对特定列进行标准化处理\n",
    "df_re_6.loc[:, float_cols] = scaler.fit_transform(df_re_6[float_cols])\n",
    "df_re_6.to_pickle('./df_re_6.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_re_7 = df.copy()\n",
    "\n",
    "float_cols = df_re_7.select_dtypes(include=['float64']).columns\n",
    "float_cols = float_cols.drop('其他.1')\n",
    "scaler = StandardScaler()\n",
    "\n",
    "# 对特定列进行标准化处理\n",
    "df_re_7.loc[:, float_cols] = scaler.fit_transform(df_re_7[float_cols])\n",
    "df_re_7.to_pickle('./df_re_7.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "restored_df_A = pd.read_pickle('./df_class.pkl')\n",
    "restored_df_A.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "restored_df_A = pd.read_pickle('./df_re_1.pkl')\n",
    "restored_df_A.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
