{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "is_executing": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv(r'E:\\Program Files (x86)\\Desktop\\论文\\NIPS\\All_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.max_rows', None)\n",
    "pd.set_option('display.max_colwidth', None)\n",
    "pd.set_option('display.width', 1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "df['Gender'] = df['Gender'].astype('float64')\n",
    "mean_value = df['Gender'].mean()"
   ],
   "metadata": {
    "collapsed": false
   },
   "execution_count": null
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "df['Gender'] = df['Gender'].fillna(mean_value)"
   ],
   "metadata": {
    "collapsed": false
   },
   "execution_count": null
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Age'] = df['Age'].astype('float64')\n",
    "mean_value = df['Age'].mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Age'] = df['Age'].fillna(mean_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Height'] = df['Height'].astype('float64')\n",
    "mean_value = df['Height'].mean()\n",
    "df['Height'] = df['Height'].fillna(mean_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['Weight'] = df['Weight'].astype('float64')\n",
    "mean_value = df['Weight'].mean()\n",
    "df['Weight'] = df['Weight'].fillna(mean_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df['BMI'] = df['BMI'].astype('float64')\n",
    "mean_value = df['BMI'].mean()\n",
    "df['BMI'] = df['BMI'].fillna(mean_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.fillna(0, inplace=True)\n",
    "if df.isnull().values.any():\n",
    "    print(\"DataFrame 中含有 NaN 值\")\n",
    "else:\n",
    "    print(\"DataFrame 中不含有 NaN 值\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 将日期字符串转换为 datetime 类型\n",
    "df['Medical Record Date'] = pd.to_datetime(df['Medical Record Date'], format='%Y/%m/%d')\n",
    "\n",
    "# 转换 datetime 为时间戳（单位为秒），并转换为浮点数\n",
    "df['Medical Record Date'] = df['Medical Record Date'].apply(lambda x: x.timestamp()).astype(float)"
   ]
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "df.head(5)"
   ],
   "metadata": {
    "collapsed": false
   },
   "execution_count": null
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df2 = df.drop(['Pain Relief Status','ERSO_Recom','IRSO_Recom','ERWO_Recom','IRWO_Recom','NSAIDs_Recom','A/A_Recom','Others_Recom'], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normalized_df = (df2 - df2.mean()) / df2.std()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normalized_df['Pain Relief Status']=df['Pain Relief Status']\n",
    "normalized_df['ERSO_Recom']=df['ERSO_Recom']\n",
    "normalized_df['IRSO_Recom']=df['IRSO_Recom']\n",
    "normalized_df['ERWO_Recom']=df['ERWO_Recom']\n",
    "normalized_df['IRWO_Recom']=df['IRWO_Recom']\n",
    "normalized_df['NSAIDs_Recom']=df['NSAIDs_Recom']\n",
    "normalized_df['A/A_Recom']=df['A/A_Recom']\n",
    "normalized_df['Others_Recom']=df['Others_Recom']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normalized_df.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normalized_df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if normalized_df.isnull().values.any():\n",
    "    print(\"DataFrame 中含有 NaN 值\")\n",
    "else:\n",
    "    print(\"DataFrame 中不含有 NaN 值\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "normalized_df.to_pickle('./df_A.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "restored_df_A = pd.read_pickle('./df_A.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "restored_df_A.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "restored_df_A.head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
