{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4ec6760a-808f-4917-9b9a-4a711290d16a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from mpl_toolkits.mplot3d import Axes3D  # 导入3D绘图工具\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.compose import TransformedTargetRegressor\n",
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "from scipy.optimize import minimize\n",
    "\n",
    "def load_data(file_path):\n",
    "    df = pd.read_csv(file_path, sep=r'\\s+', engine='python', encoding='utf-8')\n",
    "    df.columns = ['INDEX', 'CODE', 'MATH', 'GENERAL', '平均RL-LOSS', '最大RL-LOSS','PPL']\n",
    "    df['平均RL-LOSS'] = df['平均RL-LOSS'].clip(lower=0)\n",
    "    df['最大RL-LOSS'] = df['最大RL-LOSS'].clip(lower=0)\n",
    "    df['比例总和'] = df['CODE'] + df['MATH'] + df['GENERAL']\n",
    "    df = df[np.isclose(df['比例总和'], 1.0, atol=0.01)].copy()\n",
    "    return df[['CODE', 'MATH', 'GENERAL', '平均RL-LOSS', '最大RL-LOSS', 'PPL']]\n",
    "\n",
    "def train_model(X, y):\n",
    "    pipeline = Pipeline([\n",
    "        ('scaler', StandardScaler()),\n",
    "        ('regressor', GradientBoostingRegressor(\n",
    "            n_estimators=200,\n",
    "            max_depth=4,\n",
    "            learning_rate=0.05,\n",
    "            random_state=42\n",
    "        ))\n",
    "    ])\n",
    "    model = TransformedTargetRegressor(regressor=pipeline, func=np.log1p, inverse_func=np.expm1)\n",
    "    model.fit(X, y)\n",
    "    return model\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6ec04b9-c649-473d-8a68-3294662f3459",
   "metadata": {},
   "outputs": [],
   "source": [
    "def optimize_with_grid_and_refine(model, coarse_step=0.005):\n",
    "    # 粗网格搜索\n",
    "    grid = np.arange(0.0001, 1.01, coarse_step)\n",
    "    best_cfgs = []  # 保存最优的三个配置 (loss, (code, math, general))\n",
    "    worst_cfgs = []  # 保存最差的三个配置 (loss, (code, math, general))\n",
    "\n",
    "    for code in grid:\n",
    "        for math in grid:\n",
    "            general = 1.0 - code - math\n",
    "            if 0 <= general <= 1:\n",
    "                X_input = pd.DataFrame([[code, math, general]], columns=['CODE', 'MATH', 'GENERAL'])\n",
    "                pred = model.predict(X_input)[0]\n",
    "                candidate = (pred, (code, math, general))\n",
    "                \n",
    "                # 更新最优的三个点\n",
    "                if len(best_cfgs) < 3:\n",
    "                    best_cfgs.append(candidate)\n",
    "                    best_cfgs.sort(key=lambda x: x[0])  # 按损失升序排序\n",
    "                else:\n",
    "                    if pred < best_cfgs[2][0]:\n",
    "                        best_cfgs[2] = candidate\n",
    "                        best_cfgs.sort(key=lambda x: x[0])\n",
    "                \n",
    "                # 更新最差的三个点\n",
    "                if len(worst_cfgs) < 3:\n",
    "                    worst_cfgs.append(candidate)\n",
    "                    worst_cfgs.sort(key=lambda x: x[0], reverse=True)  # 按损失降序排序\n",
    "                else:\n",
    "                    if pred > worst_cfgs[2][0]:\n",
    "                        worst_cfgs[2] = candidate\n",
    "                        worst_cfgs.sort(key=lambda x: x[0], reverse=True)\n",
    "\n",
    "    # 打印粗网格搜索结果\n",
    "    print(\"🟡 粗网格搜索结果 - 最优的三个点:\")\n",
    "    for i, (loss, cfg) in enumerate(best_cfgs):\n",
    "        print(f\"  第 {i+1} 优: CODE={cfg[0]:.3f}, MATH={cfg[1]:.3f}, GENERAL={cfg[2]:.3f}, RL-LOSS={loss:.6f}\")\n",
    "    \n",
    "    print(\"🔴 粗网格搜索结果 - 最差的三个点:\")\n",
    "    for i, (loss, cfg) in enumerate(worst_cfgs):\n",
    "        print(f\"  第 {i+1} 差: CODE={cfg[0]:.3f}, MATH={cfg[1]:.3f}, GENERAL={cfg[2]:.3f}, RL-LOSS={loss:.6f}\")\n",
    "    \n",
    "    # 局部优化精修（对最优的三个点都进行精修）\n",
    "    def loss_fn(x):\n",
    "        code, math = x\n",
    "        general = 1.0 - code - math\n",
    "        if general < 0 or general > 1:\n",
    "            return 1e6\n",
    "        X = pd.DataFrame([[code, math, general]], columns=['CODE', 'MATH', 'GENERAL'])\n",
    "        return model.predict(X)[0]\n",
    "\n",
    "    bounds = [(0, 1), (0, 1)]\n",
    "    constraints = {'type': 'ineq', 'fun': lambda x: 1.0 - x[0] - x[1]}\n",
    "    \n",
    "    refined_results = []\n",
    "    for i, (init_loss, init_cfg) in enumerate(best_cfgs):\n",
    "        x0 = [init_cfg[0], init_cfg[1]]\n",
    "        result = minimize(loss_fn, x0, method='SLSQP', bounds=bounds, constraints=constraints)\n",
    "        \n",
    "        if result.success:\n",
    "            code, math = result.x\n",
    "            general = 1.0 - code - math\n",
    "            refined_loss = result.fun\n",
    "            refined_results.append((refined_loss, (code, math, general)))\n",
    "            print(f\"✅  优化 {i+1} 成功: CODE={code:.3f}, MATH={math:.3f}, GENERAL={general:.3f}, RL-LOSS={refined_loss:.6f}\")\n",
    "        else:\n",
    "            print(f\"⚠️ 优化 {i+1} 失败: {result.message}\")\n",
    "            # 如果优化失败，保留原始值\n",
    "            refined_results.append((init_loss, init_cfg))\n",
    "    \n",
    "    # 按优化后的损失排序\n",
    "    refined_results.sort(key=lambda x: x[0])\n",
    "\n",
    "    return refined_results[0][0], refined_results[0][1], worst_cfgs[0][0], worst_cfgs[0][1]\n",
    "        \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a405a505-c354-4ed0-a0eb-2cbba1bf093c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def create_3d_surface(model, best_point=None, worst_point=None):\n",
    "    \"\"\"\n",
    "    创建3D曲面图展示CODE、GENERAL和平均PPL的关系\n",
    "    \n",
    "    参数:\n",
    "        model: 训练好的预测模型\n",
    "        best_point (可选): 最优配置点\n",
    "    \"\"\"\n",
    "    # 创建网格数据\n",
    "    resolution = 100\n",
    "    code_vals = np.linspace(0.01, 0.98, resolution)\n",
    "    general_vals = np.linspace(0.01, 0.98, resolution)\n",
    "    CODE, GENERAL = np.meshgrid(code_vals, general_vals)\n",
    "    MATH = 1.0 - CODE - GENERAL\n",
    "    \n",
    "    # 初始化损失网格\n",
    "    loss_grid = np.zeros_like(CODE)\n",
    "    \n",
    "    # 预测每个点的损失值\n",
    "    for i in range(resolution):\n",
    "        for j in range(resolution):\n",
    "            if MATH[i, j] > 0:\n",
    "                X_input = pd.DataFrame([[CODE[i, j], MATH[i, j], GENERAL[i, j]]], \n",
    "                                      columns=['CODE', 'MATH', 'GENERAL'])\n",
    "                loss_grid[i, j] = model.predict(X_input)[0]\n",
    "            else:\n",
    "                loss_grid[i, j] = np.nan\n",
    "    \n",
    "    # 创建3D图\n",
    "    fig = plt.figure(figsize=(14, 10))\n",
    "    ax = fig.add_subplot(111, projection='3d')\n",
    "    \n",
    "    # 绘制曲面\n",
    "    surf = ax.plot_surface(CODE, GENERAL, loss_grid, cmap='viridis', \n",
    "                          alpha=0.8, rstride=2, cstride=2, edgecolor='none')\n",
    "    \n",
    "    # 添加颜色条\n",
    "    fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10, label='PPL')\n",
    "    \n",
    "    # 标记最优解点\n",
    "    if best_point:\n",
    "        code, math, general = best_point\n",
    "        loss = model.predict(pd.DataFrame([[code, math, general]], \n",
    "                                        columns=['CODE', 'MATH', 'GENERAL']))[0]\n",
    "        ax.scatter([code], [general], [loss], \n",
    "                   s=200, c='red', marker='o',edgecolor='black', \n",
    "                  label='Minimum PPL')\n",
    "        ax.legend()\n",
    "        \n",
    "    #if worst_point:\n",
    "    #    code, math, general = worst_point\n",
    "    #    loss = model.predict(pd.DataFrame([[code, math, general]], \n",
    "    #                                    columns=['CODE', 'MATH', 'GENERAL']))[0]\n",
    "    #    ax.scatter([code], [general], [loss/10], \n",
    "    #               s=200, c='blue', marker='o', edgecolor='black', \n",
    "    #              label='Maximum PPL')\n",
    "    #    ax.legend()\n",
    "\n",
    "    ## 设置标签与百分比格式（X 和 Y 从 100% 到 0%）\n",
    "    def percent_format(val, pos):\n",
    "        return f\"{int(val * 100)}%\"\n",
    "\n",
    "    # 设置轴范围，并将X/Y反转方向显示为100%到0%\n",
    "    ax.set_xlim(1, 0)\n",
    "    ax.set_ylim(1, 0)\n",
    "    ax.xaxis.set_major_formatter(plt.FuncFormatter(percent_format))\n",
    "    ax.yaxis.set_major_formatter(plt.FuncFormatter(percent_format))\n",
    "    \n",
    "    # 添加标签\n",
    "    ax.set_xlabel('CODE', fontsize=12, labelpad=10)\n",
    "    ax.set_ylabel('MATH', fontsize=12, labelpad=10)\n",
    "    ax.set_zlabel('PPL', fontsize=12, labelpad=10)\n",
    "    \n",
    "    # 设置视角\n",
    "    ax.view_init(elev=30, azim=45)  # 调整视角\n",
    "    \n",
    "    # 添加标题\n",
    "    plt.title('Impact of CODE MATH and GENERAL Proportions on PPL', fontsize=14, pad=20)\n",
    "    plt.savefig('3d_visualization_clean.pdf', dpi=300, bbox_inches='tight')\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c20db38a-661e-4c4b-8ca8-45739e6ce1e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "def main():\n",
    "    file_path = '1M_data_config_ppl.txt'\n",
    "    df = load_data(file_path)\n",
    "    X = df[['CODE', 'MATH', 'GENERAL']]\n",
    "    \n",
    "    # 用于存储两个目标的结果\n",
    "    summary = {}\n",
    "    \n",
    "    # 只处理平均 RL-LOSS\n",
    "    target = 'PPL'\n",
    "    y = df[target]\n",
    "    print(df[target])\n",
    "    print(f\"\\n📌  正在拟合模型预测 {target} ...\")\n",
    "    model = train_model(X, y)\n",
    "    \n",
    "    # 找到最优解\n",
    "    best_loss, best_cfg, worst_loss, worst_cfg = optimize_with_grid_and_refine(model)\n",
    "    \n",
    "    # 创建简洁的3D曲面图\n",
    "    print(\"\\n📊 创建简洁的3D曲面图...\")\n",
    "    create_3d_surface(model, best_point=best_cfg, worst_point=worst_cfg)\n",
    "\n",
    "    print(f\"\\n🔍  [{target}] 最优配比结果:\")\n",
    "    print(f\"  CODE    = {best_cfg[0]:.4f}\")\n",
    "    print(f\"  MATH    = {best_cfg[1]:.4f}\")\n",
    "    print(f\"  GENERAL = {best_cfg[2]:.4f}\")\n",
    "    print(f\"  预测{target} = {best_loss:.6f}\")\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    main()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d231761f-cde6-43d0-9afb-2676a2d3d42c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a414cec-3253-44b6-b422-480b82f85bae",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cmsft",
   "language": "python",
   "name": "cmsft"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
