{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e538dded",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from metagen.benchmarks import *\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f937ab8",
   "metadata": {},
   "outputs": [],
   "source": [
    "llava_omnitask = {\n",
    "    'test_path': '/benchmark/omnitask/test.jsonl',\n",
    "    'prediction_path': '/workspace/benchmark_inference/omni/evaluate_test_18500_sysp_omni_test/predicted_code',\n",
    "    'processed_path': '/workspace/benchmark_inference/omni/evaluate_test_18500_sysp_omni_test/predicted_successes',\n",
    "    'model_name': 'LLaVAOmniTask',\n",
    "    'index_at_one': True\n",
    "}\n",
    "\n",
    "# reconstruction task\n",
    "llava_reconstruction = {\n",
    "    'test_path': '/benchmark/reconstruction/4_view_reconstruction/test.jsonl',\n",
    "    'prediction_path': '/workspace/benchmark_inference/reconstruction/llava/evaluate_test_6500_raw_sysp_4view_newdata/predicted_code',\n",
    "    'processed_path': '/workspace/benchmark_inference/reconstruction/llava/evaluate_test_6500_raw_sysp_4view_newdata/predicted_successes',\n",
    "    'model_name': 'LLaVASingleTask',\n",
    "    'index_at_one': True\n",
    "}\n",
    "\n",
    "# inverse design task\n",
    "llava_inverse_design = {\n",
    "    'test_path': '/benchmark/inverse_design/4_target_inverse_design/test.jsonl',\n",
    "    'prediction_path': '/workspace/benchmark_inference/inverse_design/llava/evaluate_test_9000_raw_sysp_4view/predicted_code',\n",
    "    'processed_path': '/workspace/benchmark_inference/inverse_design/llava/evaluate_test_9000_raw_sysp_4view/predicted_successes',\n",
    "    'model_name': 'LLaVASingleTask',\n",
    "    'index_at_one': True\n",
    "}\n",
    "\n",
    "# material understanding task\n",
    "llava_material_understanding = {\n",
    "    'test_path': '/benchmark/material_understanding/multiview_and_code_material_understanding/test.jsonl',\n",
    "    'prediction_path': '/workspace/benchmark_inference/material_understanding/llava/evaluate_test_7000_sysp_raw_4view/predicted_code',\n",
    "    'processed_path': None, # no need to process\n",
    "    'model_name': 'LLaVASingleTask',\n",
    "    'index_at_one': True\n",
    "}\n",
    "\n",
    "nova_omnitask = {\n",
    "    'test_path':'/benchmark/omnitask/test.jsonl',\n",
    "    'prediction_path':'/workspace/inference_data/NovaOmniTask/omnitask/predicted_code',\n",
    "    'processed_path':'/workspace/inference_data/NovaOmniTask/omnitask/predicted_successes',\n",
    "    'model_name':'NovaOmniTask',\n",
    "    'index_at_one':False\n",
    "}\n",
    "novalite_omnitask = {\n",
    "    'test_path':'/benchmark/omnitask/test.jsonl',\n",
    "    'prediction_path':'/workspace/inference_data/NovaLite/omnitask/predicted_code',\n",
    "    'processed_path':'/workspace/inference_data/NovaLite/omnitask/predicted_successes',\n",
    "    'model_name':'NovaLite',\n",
    "    'index_at_one':False\n",
    "}\n",
    "nova_inverse_design = {\n",
    "    'test_path':'/benchmark/inverse_design/4_target_inverse_design/test.jsonl',\n",
    "    'prediction_path':'/workspace/inference_data/NovaSingleTask/inverse_design/4_target_inverse_design/predicted_code',\n",
    "    'processed_path':'/workspace/inference_data/NovaSingleTask/inverse_design/4_target_inverse_design/predicted_successes',\n",
    "    'model_name':'NovaSingleTask',\n",
    "    'index_at_one':False\n",
    "}\n",
    "nova_material_understanding = {\n",
    "    'test_path':'/benchmark/material_understanding/multiview_and_code_material_understanding/test.jsonl',\n",
    "    'prediction_path':'/workspace/inference_data/NovaSingleTask/material_understanding/multiview_and_code_material_understanding/predicted_code',\n",
    "    'processed_path':'/workspace/inference_data/NovaSingleTask/material_understanding/multiview_and_code_material_understanding/predicted_successes',\n",
    "    'model_name':'NovaSingleTask',\n",
    "    'index_at_one':False\n",
    "}\n",
    "nova_reconstruction = {\n",
    "    'test_path':'/benchmark/reconstruction/4_view_reconstruction/test.jsonl',\n",
    "    'prediction_path':'/workspace/inference_data/NovaSingleTask/reconstruction/4_view_reconstruction/predicted_code',\n",
    "    'processed_path':'/workspace/inference_data/NovaSingleTask/reconstruction/4_view_reconstruction/predicted_successes',\n",
    "    'model_name':'NovaSingleTask',\n",
    "    'index_at_one':False\n",
    "}\n",
    "o3_omnitask = {\n",
    "        'test_path':'/benchmark/omnitask/test.jsonl',\n",
    "        'prediction_path':'/workspace/inference_data/OpenAIO3/omnitask/predicted_code',\n",
    "        'processed_path':'/workspace/inference_data/OpenAIO3/omnitask/predicted_successes',\n",
    "        'model_name':'OpenAIO3',\n",
    "        'index_at_one':False\n",
    "    }\n",
    "\n",
    "model_results = [\n",
    "    nova_omnitask,\n",
    "    novalite_omnitask,\n",
    "    nova_inverse_design,\n",
    "    nova_material_understanding,\n",
    "    nova_reconstruction,\n",
    "    llava_omnitask,\n",
    "    llava_reconstruction,\n",
    "    llava_inverse_design,\n",
    "    llava_material_understanding,\n",
    "    o3_omnitask\n",
    "]\n",
    "db = Database('/data/metagen-data/v3/')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "158b2ee9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from metagen.benchmarks import *\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "def group_table(bmr):\n",
    "    return bmr[bmr.metric.isin(['Valid', 'IoU', 'Chamfer Distance', 'Average Normalized Error'])][['Model','Category','metric', 'value']].groupby(['Model', 'Category','metric']).agg('mean')\n",
    "debug_results = evaluate_benchmarks(db, '/workspace/benchmark_results.parquet', model_results)\n",
    "#group_table(debug_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5b3fc8e7",
   "metadata": {},
   "outputs": [],
   "source": [
    "o3_only_results = evaluate_benchmarks(db, '/workspace/benchmark_results_just_o3.parquet', [o3_omnitask])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5a670f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_pivot(o3_only_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d6ab0764",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from metagen.benchmarks import *\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a20a27a6",
   "metadata": {},
   "outputs": [],
   "source": [
    "combined_data = pd.concat([benchmark_results, filtered_nl2], axis=0).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d68d992",
   "metadata": {},
   "outputs": [],
   "source": [
    "combined_data.to_csv('/data/metagen-data/v3/workspace/benchmark_results.csv')\n",
    "combined_data.to_parquet('/data/metagen-data/v3/workspace/benchmark_results.parquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "431b752a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "18fa3331",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_pivot(combined_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63f9b7eb",
   "metadata": {},
   "outputs": [],
   "source": [
    "benchmark_results_nova_lite = evaluate_benchmarks(db, '/workspace/benchmark_results_nova_lite.parquet', [novalite_omnitask])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c879da5a",
   "metadata": {},
   "outputs": [],
   "source": [
    "benchmark_results = evaluate_benchmarks(db, '/workspace/benchmark_results.parquet', model_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "95f87a9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "benchmark_results.to_csv('/data/metagen-data/v3/workspace/benchmark_results.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3e5bf884",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_pivot(benchmark_results_nova_lite)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "716ff87b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def group_table(bmr):\n",
    "    return bmr[bmr.metric.isin(['Valid', 'IoU', 'Chamfer Distance', 'Average Normalized Error'])][['Model','Category','metric', 'value']].groupby(['Model', 'Category','metric']).agg('mean')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1e4b034b",
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_pivot(df):\n",
    "    df = df[df.metric.isin(['Valid', 'IoU', 'Chamfer Distance', 'Average Normalized Error'])].copy()\n",
    "\n",
    "    # Define mappings\n",
    "    category_mapping = {\n",
    "        'inverse_design': 'Inverse Design',\n",
    "        'material_understanding': 'Material Understanding',\n",
    "        'reconstruction': 'Reconstruction'\n",
    "    }\n",
    "\n",
    "    metric_mapping = {\n",
    "        'Average Normalized Error': 'Error',\n",
    "        'Chamfer Distance': 'CD'\n",
    "    }\n",
    "\n",
    "    # Apply mappings\n",
    "    df['Category'] = df['Category'].replace(category_mapping)\n",
    "    df['metric'] = df['metric'].replace(metric_mapping)\n",
    "\n",
    "    # Rename the column 'metric' to 'Metric'\n",
    "    df = df.rename(columns={'metric': 'Metric'})\n",
    "\n",
    "    pivot_table = pd.pivot_table(\n",
    "    df,\n",
    "    index='Model',\n",
    "    columns=['Category', 'Metric'],\n",
    "    values='value',\n",
    "    aggfunc='mean'\n",
    "    )\n",
    "    return pivot_table\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "d9de4dbb",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# Define which metrics should be minimized or maximized\n",
    "minimize_metrics = {'Error', 'CD'}\n",
    "maximize_metrics = {'IoU', 'Valid'}\n",
    "\n",
    "# Function to bold best values\n",
    "def highlight_best(df):\n",
    "    formatted_df = df.copy()\n",
    "\n",
    "    for col in df.columns:\n",
    "        values = df[col]\n",
    "        metric_name = col[1]  # assuming MultiIndex columns: (Category, Metric)\n",
    "\n",
    "        # Determine best value (ignoring NaNs)\n",
    "        if metric_name in minimize_metrics:\n",
    "            best_val = values.min(skipna=True)\n",
    "        elif metric_name in maximize_metrics:\n",
    "            best_val = values.max(skipna=True)\n",
    "        else:\n",
    "            continue\n",
    "\n",
    "        def format_value(x):\n",
    "            if pd.isna(x):\n",
    "                return r'\\textemdash{}'  # Represent NaNs as a dash\n",
    "            if metric_name == 'Valid':\n",
    "                percent = x * 100\n",
    "                formatted = \"100\\\\%\" if percent == 100 else f\"{percent:.1f}\\\\%\"\n",
    "            else:\n",
    "                formatted = f\"{x:.3f}\"\n",
    "            return f\"\\\\textbf{{{formatted}}}\" if x == best_val else formatted\n",
    "\n",
    "        formatted_df[col] = values.apply(format_value)\n",
    "\n",
    "    return formatted_df\n",
    "\n",
    "def to_latex(formatted_pivot):\n",
    "    n_cols = formatted_pivot.shape[1] + 1\n",
    "    col_fmt = 'c' * n_cols\n",
    "\n",
    "    latex = formatted_pivot.to_latex(\n",
    "        escape=False,\n",
    "        multicolumn=True,\n",
    "        multicolumn_format='c',\n",
    "        column_format=col_fmt\n",
    "    )\n",
    "\n",
    "    return latex\n",
    "# Apply the formatting\n",
    "#formatted_pivot = highlight_best(pivot_table)\n",
    "\n",
    "# Convert to LaTeX\n",
    "#latex_str = formatted_pivot.to_latex(escape=False, multicolumn=True, multicolumn_format='c')\n",
    "\n",
    "# Output LaTeX string\n",
    "#print(latex_str)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bdca6b6d",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install fastparquet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "592a928b",
   "metadata": {},
   "outputs": [],
   "source": [
    "bmr = pd.read_parquet('/data/metagen-data/v3/workspace/benchmark_results.parquet', engine='fastparquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e8a1c48b",
   "metadata": {},
   "outputs": [],
   "source": [
    "bmr.Task.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "dd82022a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th>Task</th>\n",
       "      <th colspan=\"2\" halign=\"left\">1 Target</th>\n",
       "      <th colspan=\"2\" halign=\"left\">2 Target</th>\n",
       "      <th colspan=\"2\" halign=\"left\">3 Target</th>\n",
       "      <th colspan=\"2\" halign=\"left\">4 Target</th>\n",
       "      <th colspan=\"2\" halign=\"left\">5 Target</th>\n",
       "      <th colspan=\"2\" halign=\"left\">6 Target</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Metric</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "      <th>Error</th>\n",
       "      <th>Valid</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Model</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>LLaVAOmniTask</th>\n",
       "      <td>0.022806</td>\n",
       "      <td>0.990329</td>\n",
       "      <td>0.011242</td>\n",
       "      <td>0.942639</td>\n",
       "      <td>0.007164</td>\n",
       "      <td>0.931227</td>\n",
       "      <td>0.009968</td>\n",
       "      <td>0.896797</td>\n",
       "      <td>0.007997</td>\n",
       "      <td>0.882771</td>\n",
       "      <td>0.007951</td>\n",
       "      <td>0.879218</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>LLaVASingleTask</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.036247</td>\n",
       "      <td>0.818505</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaLite</th>\n",
       "      <td>0.036171</td>\n",
       "      <td>0.021277</td>\n",
       "      <td>0.049251</td>\n",
       "      <td>0.045889</td>\n",
       "      <td>0.043128</td>\n",
       "      <td>0.020446</td>\n",
       "      <td>0.077813</td>\n",
       "      <td>0.032028</td>\n",
       "      <td>0.083109</td>\n",
       "      <td>0.012433</td>\n",
       "      <td>0.071955</td>\n",
       "      <td>0.028419</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaOmniTask</th>\n",
       "      <td>0.020252</td>\n",
       "      <td>0.903288</td>\n",
       "      <td>0.018464</td>\n",
       "      <td>0.906310</td>\n",
       "      <td>0.023943</td>\n",
       "      <td>0.905204</td>\n",
       "      <td>0.028749</td>\n",
       "      <td>0.927046</td>\n",
       "      <td>0.034508</td>\n",
       "      <td>0.902309</td>\n",
       "      <td>0.028477</td>\n",
       "      <td>0.939609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>NovaSingleTask</th>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.032133</td>\n",
       "      <td>0.791815</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>OpenAIO3</th>\n",
       "      <td>0.044609</td>\n",
       "      <td>0.305221</td>\n",
       "      <td>0.035470</td>\n",
       "      <td>0.201980</td>\n",
       "      <td>0.023259</td>\n",
       "      <td>0.230769</td>\n",
       "      <td>0.045222</td>\n",
       "      <td>0.205176</td>\n",
       "      <td>0.037045</td>\n",
       "      <td>0.282051</td>\n",
       "      <td>0.042319</td>\n",
       "      <td>0.259124</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "Task             1 Target            2 Target            3 Target            \\\n",
       "Metric              Error     Valid     Error     Valid     Error     Valid   \n",
       "Model                                                                         \n",
       "LLaVAOmniTask    0.022806  0.990329  0.011242  0.942639  0.007164  0.931227   \n",
       "LLaVASingleTask       NaN       NaN       NaN       NaN       NaN       NaN   \n",
       "NovaLite         0.036171  0.021277  0.049251  0.045889  0.043128  0.020446   \n",
       "NovaOmniTask     0.020252  0.903288  0.018464  0.906310  0.023943  0.905204   \n",
       "NovaSingleTask        NaN       NaN       NaN       NaN       NaN       NaN   \n",
       "OpenAIO3         0.044609  0.305221  0.035470  0.201980  0.023259  0.230769   \n",
       "\n",
       "Task             4 Target            5 Target            6 Target            \n",
       "Metric              Error     Valid     Error     Valid     Error     Valid  \n",
       "Model                                                                        \n",
       "LLaVAOmniTask    0.009968  0.896797  0.007997  0.882771  0.007951  0.879218  \n",
       "LLaVASingleTask  0.036247  0.818505       NaN       NaN       NaN       NaN  \n",
       "NovaLite         0.077813  0.032028  0.083109  0.012433  0.071955  0.028419  \n",
       "NovaOmniTask     0.028749  0.927046  0.034508  0.902309  0.028477  0.939609  \n",
       "NovaSingleTask   0.032133  0.791815       NaN       NaN       NaN       NaN  \n",
       "OpenAIO3         0.045222  0.205176  0.037045  0.282051  0.042319  0.259124  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def make_category_pivot(df, category):\n",
    "    df = df[df.metric.isin(['Valid', 'IoU', 'Chamfer Distance', 'Average Normalized Error'])].copy()\n",
    "\n",
    "    # Define mappings\n",
    "    category_mapping = {\n",
    "        'inverse_design': 'Inverse Design',\n",
    "        'material_understanding': 'Material Understanding',\n",
    "        'reconstruction': 'Reconstruction'\n",
    "    }\n",
    "\n",
    "    metric_mapping = {\n",
    "        'Average Normalized Error': 'Error',\n",
    "        'Chamfer Distance': 'CD'\n",
    "    }\n",
    "\n",
    "    task_mapping = {\n",
    "        'multiview_and_code_material_understanding': '4 View + Code',\n",
    "        'single_view_material_understanding': '1 View',\n",
    "        '4_target_inverse_design': '4 Target',\n",
    "        '2_view_reconstruction': '2 View',\n",
    "        '6_target_inverse_design': '6 Target',\n",
    "        '3_view_reconstruction': '3 View',\n",
    "        '1_view_reconstruction': '1 View',\n",
    "        '5_target_inverse_design': '5 Target',\n",
    "        '3_target_inverse_design': '3 Target',\n",
    "        '2_target_inverse_design': '2 Target',\n",
    "        '4_view_reconstruction': '4 View',\n",
    "        '1_target_inverse_design': '1 Target'\n",
    "    }\n",
    "\n",
    "    df = df[df.Category == category].copy()\n",
    "\n",
    "    # Apply mappings\n",
    "    df['Category'] = df['Category'].replace(category_mapping)\n",
    "    df['metric'] = df['metric'].replace(metric_mapping)\n",
    "    df['Task'] = df['Task'].replace(task_mapping)\n",
    "\n",
    "    # Rename the column 'metric' to 'Metric'\n",
    "    df = df.rename(columns={'metric': 'Metric'})\n",
    "\n",
    "    pivot_table = pd.pivot_table(\n",
    "    df,\n",
    "    index='Model',\n",
    "    columns=['Task', 'Metric'],\n",
    "    values='value',\n",
    "    aggfunc='mean'\n",
    "    )\n",
    "    return pivot_table\n",
    "\n",
    "make_category_pivot(bmr, 'inverse_design')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15eb4df2",
   "metadata": {},
   "outputs": [],
   "source": [
    "novalitenewfilt = evaluate_benchmarks(db, '/workspace/benchmark_results.parquet', [novalite_omnitask])\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3ed63e38",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_pivot(novalitenewfilt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1226fec4",
   "metadata": {},
   "outputs": [],
   "source": [
    "bmr = pd.read_parquet('/data/metagen-data/v3/workspace/benchmark_results_unfiltered.parquet', engine='fastparquet')\n",
    "bmr = bmr[(bmr.value < np.e**10) & (~bmr.value.isna())].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3a3eb9ef",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_pivot(bmr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b5567e5e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{ccccc}\n",
      "\\toprule\n",
      "Task & \\multicolumn{2}{c}{1 View} & \\multicolumn{2}{c}{4 View + Code} \\\\\n",
      "Metric & Error & Valid & Error & Valid \\\\\n",
      "Model &  &  &  &  \\\\\n",
      "\\midrule\n",
      "LLaVAOmniTask & \\textbf{0.026} & \\textbf{100\\%} & 0.023 & \\textbf{100\\%} \\\\\n",
      "LLaVASingleTask & \\textemdash{} & \\textemdash{} & \\textbf{0.018} & \\textbf{100\\%} \\\\\n",
      "NovaLite & 0.208 & \\textbf{100\\%} & 0.192 & \\textbf{100\\%} \\\\\n",
      "NovaOmniTask & 0.031 & \\textbf{100\\%} & 0.032 & \\textbf{100\\%} \\\\\n",
      "NovaSingleTask & \\textemdash{} & \\textemdash{} & 0.153 & \\textbf{100\\%} \\\\\n",
      "OpenAIO3 & 0.084 & \\textbf{100\\%} & 0.071 & \\textbf{100\\%} \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(to_latex(highlight_best(make_category_pivot(bmr, 'material_understanding'))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "483b7a9a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{ccccccccccccc}\n",
      "\\toprule\n",
      "Task & \\multicolumn{3}{c}{1 View} & \\multicolumn{3}{c}{2 View} & \\multicolumn{3}{c}{3 View} & \\multicolumn{3}{c}{4 View} \\\\\n",
      "Metric & CD & IoU & Valid & CD & IoU & Valid & CD & IoU & Valid & CD & IoU & Valid \\\\\n",
      "Model &  &  &  &  &  &  &  &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "LLaVAOmniTask & \\textbf{0.036} & \\textbf{0.458} & 82.3\\% & \\textbf{0.033} & \\textbf{0.497} & 83.0\\% & \\textbf{0.032} & \\textbf{0.509} & 83.2\\% & 0.033 & 0.497 & 83.2\\% \\\\\n",
      "LLaVASingleTask & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textbf{0.029} & \\textbf{0.524} & 83.8\\% \\\\\n",
      "NovaLite & 0.119 & 0.049 & 18.7\\% & 0.117 & 0.050 & 17.0\\% & 0.118 & 0.053 & 22.0\\% & 0.125 & 0.050 & 25.0\\% \\\\\n",
      "NovaOmniTask & 0.047 & 0.307 & \\textbf{87.5\\%} & 0.044 & 0.338 & \\textbf{87.5\\%} & 0.043 & 0.350 & \\textbf{86.2\\%} & 0.044 & 0.346 & \\textbf{87.8\\%} \\\\\n",
      "NovaSingleTask & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & 0.059 & 0.205 & 84.8\\% \\\\\n",
      "OpenAIO3 & 0.052 & 0.150 & 36.8\\% & 0.055 & 0.141 & 58.9\\% & 0.052 & 0.151 & 62.6\\% & 0.052 & 0.155 & 68.5\\% \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(to_latex(highlight_best(make_category_pivot(bmr, 'reconstruction'))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "e1a5ffe1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\begin{tabular}{ccccccccccccc}\n",
      "\\toprule\n",
      "Task & \\multicolumn{2}{c}{1 Target} & \\multicolumn{2}{c}{2 Target} & \\multicolumn{2}{c}{3 Target} & \\multicolumn{2}{c}{4 Target} & \\multicolumn{2}{c}{5 Target} & \\multicolumn{2}{c}{6 Target} \\\\\n",
      "Metric & Error & Valid & Error & Valid & Error & Valid & Error & Valid & Error & Valid & Error & Valid \\\\\n",
      "Model &  &  &  &  &  &  &  &  &  &  &  &  \\\\\n",
      "\\midrule\n",
      "LLaVAOmniTask & 0.023 & \\textbf{99.0\\%} & \\textbf{0.011} & \\textbf{94.3\\%} & \\textbf{0.007} & \\textbf{93.1\\%} & \\textbf{0.010} & 89.7\\% & \\textbf{0.008} & 88.3\\% & \\textbf{0.008} & 87.9\\% \\\\\n",
      "LLaVASingleTask & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & 0.036 & 81.9\\% & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} \\\\\n",
      "NovaLite & 0.036 & 2.1\\% & 0.049 & 4.6\\% & 0.043 & 2.0\\% & 0.078 & 3.2\\% & 0.083 & 1.2\\% & 0.072 & 2.8\\% \\\\\n",
      "NovaOmniTask & \\textbf{0.020} & 90.3\\% & 0.018 & 90.6\\% & 0.024 & 90.5\\% & 0.029 & \\textbf{92.7\\%} & 0.035 & \\textbf{90.2\\%} & 0.028 & \\textbf{94.0\\%} \\\\\n",
      "NovaSingleTask & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} & 0.032 & 79.2\\% & \\textemdash{} & \\textemdash{} & \\textemdash{} & \\textemdash{} \\\\\n",
      "OpenAIO3 & 0.045 & 30.5\\% & 0.035 & 20.2\\% & 0.023 & 23.1\\% & 0.045 & 20.5\\% & 0.037 & 28.2\\% & 0.042 & 25.9\\% \\\\\n",
      "\\bottomrule\n",
      "\\end{tabular}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "print(to_latex(highlight_best(make_category_pivot(bmr, 'inverse_design'))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "33ce3b23",
   "metadata": {},
   "outputs": [],
   "source": [
    "benchmark_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6cb9c38c",
   "metadata": {},
   "outputs": [],
   "source": [
    "benchmark_results = benchmark_results[benchmark_results.Model != 'NovaLite'].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8b7e92e",
   "metadata": {},
   "outputs": [],
   "source": [
    "benchmark_results.to_csv('/data/metagen-data/v3/workspace/benchmark_results.csv')\n",
    "benchmark_results.to_parquet('/data/metagen-data/v3/workspace/benchmark_results.parquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9c16ad34",
   "metadata": {},
   "outputs": [],
   "source": [
    "is_normalized = np.vectorize(lambda x: 'Normalized' in x and 'Average' not in x)\n",
    "is_absolute = np.vectorize(lambda x: 'Absolute' in x and 'Average' not in x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "405fe129",
   "metadata": {},
   "outputs": [],
   "source": [
    "benchmark_results[(benchmark_results.Model == 'NovaOmniTask')&(benchmark_results.metric == 'Average Normalized Error') & (benchmark_results.Category == 'inverse_design')].value"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d1eb7277",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install fastparquet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9dd076b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "bmr = pd.read_parquet('/data/metagen-data/v3/workspace/benchmark_results_unfiltered.parquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e77c6890",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/data/metagen-data/v3/benchmark/omnitask/test.jsonl', 'r') as f:\n",
    "    test_data = [json.loads(line) for line in f.readlines()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba47d425",
   "metadata": {},
   "outputs": [],
   "source": [
    "idts = [t for t in test_data if t['task_category'] == 'inverse_design']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bc20588e",
   "metadata": {},
   "outputs": [],
   "source": [
    "idts[0]['data'][0]['target_type']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5257142d",
   "metadata": {},
   "outputs": [],
   "source": [
    "from collections import Counter\n",
    "Counter([t['target_type'] for task in idts for t in task['data']])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8871551a",
   "metadata": {},
   "outputs": [],
   "source": [
    "bmr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1da28187",
   "metadata": {},
   "outputs": [],
   "source": [
    "inconsistencies = bmr.value.values != benchmark_results.value.values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "46f70262",
   "metadata": {},
   "outputs": [],
   "source": [
    "bmr = benchmark_results[~bmr.value.isna()].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "494081b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "bmr.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30ababd4",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f2671e2d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "137f6349",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5e7c4c3",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ccb96685",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1db4ee4",
   "metadata": {},
   "outputs": [],
   "source": [
    "debug_results[is_normalized(debug_results.metric)].value.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "199ad0ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "debug_results[is_normalized(debug_results.metric)].groupby(['Label']).agg({'value':'mean'}).reset_index().value.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1fc230b",
   "metadata": {},
   "outputs": [],
   "source": [
    "1457 / sum(is_normalized(debug_results.metric))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1033b117",
   "metadata": {},
   "outputs": [],
   "source": [
    "group_table(debug_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b1dfd1fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_nl = benchmark_results_nova_lite[~benchmark_results_nova_lite.Label.isin(benchmark_results_nova_lite[benchmark_results_nova_lite.value.isna()].Label.unique())].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d6337cee",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_pivot(filtered_nl)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ed120e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d7edd816",
   "metadata": {},
   "outputs": [],
   "source": [
    "likely_asymmetric = filtered_nl[filtered_nl.value > np.e**10].Label.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f3c077b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "filtered_nl2 = filtered_nl[~filtered_nl.Label.isin(likely_asymmetric)].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a145d9a",
   "metadata": {},
   "outputs": [],
   "source": [
    "make_pivot(filtered_nl2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d4f2ec80",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7a937f10",
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.hist(np.log(1+filtered_nl[filtered_nl.metric == 'Average Normalized Error'].value))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e0ab0ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "np.log(np.e)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7c524f37",
   "metadata": {},
   "outputs": [],
   "source": [
    "bmr[(bmr.Model =='NovaOmniTask') & (bmr.metric == 'IoU') & (bmr.Category == 'reconstruction')].sort_values(by='value', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f5e2c042",
   "metadata": {},
   "outputs": [],
   "source": [
    "from PIL import Image\n",
    "import json\n",
    "import os\n",
    "from metagen.benchmarks import *\n",
    "with open('/data/metagen-data/v3/benchmark/omnitask/test.jsonl', 'r') as f:\n",
    "    test_data = [json.loads(line) for line in f.readlines()]\n",
    "test_tasks = {task['label']:task for task in test_data}\n",
    "task_indices = {task['label']:i for i, task in enumerate(test_data)}\n",
    "db = Database('/data/metagen-data/v3/')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "282f34c9",
   "metadata": {},
   "outputs": [],
   "source": [
    "id_tasks = [t for t in test_data if t['task_category'] == 'inverse_design']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "40f75fd5",
   "metadata": {},
   "outputs": [],
   "source": [
    "stasks = bmr[(bmr.Model =='NovaOmniTask') & (bmr.metric == 'Average Normalized Error') & (bmr.Category == 'inverse_design') & (bmr.Task == '4_target_inverse_design')].sort_values(by='value', ascending=True)\n",
    "stasks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a4ea3c1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "top_indices = stasks.index.values[:10]\n",
    "top_examples = stasks.Label.values[:10]\n",
    "top_ex_indices = [task_indices[label] for label in top_examples]\n",
    "top_ex_tasks = [test_tasks[label] for label in top_examples]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d68744ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i, task in zip(top_indices, top_ex_tasks):\n",
    "    print(task['query'])\n",
    "    plot_from_iloc(i).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74eca8cc",
   "metadata": {},
   "outputs": [],
   "source": [
    "stasks.iloc[32028]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f3cd403f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "\n",
    "for t in random.sample(id_tasks, 20):\n",
    "    print(t['query'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a5f99fc5",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6f0bb8b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "df5e5487",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_from_iloc(iloc):\n",
    "    label = bmr.iloc[iloc].Label\n",
    "    gt_source = test_tasks[label]['source']\n",
    "    idx = task_indices[label]\n",
    "    gt_image = Image.open(db.path(db.renders(gt_source)['top_right']))\n",
    "    return gt_image\n",
    "    #pred_image = Image.open(f'/data/metagen-data/v3/workspace/inference_data/NovaOmniTask/omnitask/predicted_successes/{idx}/top_right.png')\n",
    "    #return Image.fromarray(np.concatenate([np.asarray(gt_image), np.asarray(pred_image)], axis=1))\n",
    "     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "608fec14",
   "metadata": {},
   "outputs": [],
   "source": [
    "is_top_right = np.vectorize(lambda x: 'top_right' in x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7734123c",
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.min_rows = 20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d5ed331c",
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_from_iloc(bmr[(bmr.Model =='NovaOmniTask') & (bmr.metric == 'Chamfer Distance') & (bmr.Category == 'reconstruction') & (bmr.Task == '1_view_reconstruction') & (is_top_right(bmr.Label))].sort_values(by='value', ascending=True).index[11])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11dd2c32",
   "metadata": {},
   "outputs": [],
   "source": [
    "images = []\n",
    "for i in bmr[(bmr.Model =='NovaOmniTask') & (bmr.metric == 'Chamfer Distance') & (bmr.Category == 'reconstruction') & (bmr.Task == '1_view_reconstruction') & (is_top_right(bmr.Label))].sort_values(by='value', ascending=True).index[11]\n",
    "    images.append(np.asarray(plot_from_iloc(i)))\n",
    "Image.fromarray(np.concatenate(images, axis=0))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ebb68050",
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_from_iloc(13187)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c586b0f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "from metagen.benchmarks import Database\n",
    "db = Database('/data/metagen-data/v3/')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "74f8cc9d",
   "metadata": {},
   "outputs": [],
   "source": [
    "len(db.sources)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b8ba3a1",
   "metadata": {},
   "outputs": [],
   "source": [
    "len([x for x in db.sources if '/hybrid/' in x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fccdb928",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.patches as patches\n",
    "\n",
    "def plot_inverse_design_results(properties):\n",
    "    \"\"\"\n",
    "    properties: List of dicts with each property having:\n",
    "        - name: str\n",
    "        - target: dict with 'type' ('value', 'upper', 'lower', or 'range'), and 'value' or 'min'/'max'\n",
    "        - actual: float\n",
    "        - range: tuple (min, max) for display purposes\n",
    "    \"\"\"\n",
    "    fig, ax = plt.subplots(figsize=(8, len(properties) * 1.5))\n",
    "    ax.set_xlim(0, 1)\n",
    "    ax.axis('off')\n",
    "\n",
    "    for i, prop in enumerate(properties):\n",
    "        y = len(properties) - i - 1\n",
    "\n",
    "        # Create axes for this property\n",
    "        ax_sub = fig.add_axes([0.15, 0.1 + y * 0.12, 0.7, 0.1])\n",
    "        ax_sub.set_yticks([])\n",
    "        ax_sub.set_xlim(*prop['range'])\n",
    "        ax_sub.set_title(prop['name'], loc='left')\n",
    "\n",
    "        # Draw target region\n",
    "        if prop['target']['type'] == 'value':\n",
    "            t = prop['target']['value']\n",
    "            ax_sub.axvspan(t - 0.01*(prop['range'][1]-prop['range'][0]), \n",
    "                           t + 0.01*(prop['range'][1]-prop['range'][0]), \n",
    "                           color='lightblue', alpha=0.5)\n",
    "        elif prop['target']['type'] == 'upper':\n",
    "            ax_sub.axvspan(prop['range'][0], prop['target']['value'], color='lightblue', alpha=0.5)\n",
    "        elif prop['target']['type'] == 'lower':\n",
    "            ax_sub.axvspan(prop['target']['value'], prop['range'][1], color='lightblue', alpha=0.5)\n",
    "        elif prop['target']['type'] == 'range':\n",
    "            ax_sub.axvspan(prop['target']['min'], prop['target']['max'], color='lightblue', alpha=0.5)\n",
    "\n",
    "        # Plot actual result\n",
    "        actual = prop['actual']\n",
    "        if actual < prop['range'][0]:\n",
    "            ax_sub.annotate('', xy=(prop['range'][0], 0), xytext=(prop['range'][0] + 0.05, 0),\n",
    "                            arrowprops=dict(arrowstyle='-|>', color='red'))\n",
    "        elif actual > prop['range'][1]:\n",
    "            ax_sub.annotate('', xy=(prop['range'][1], 0), xytext=(prop['range'][1] - 0.05, 0),\n",
    "                            arrowprops=dict(arrowstyle='-|>', color='red'))\n",
    "        else:\n",
    "            ax_sub.plot(actual, 0, 'ro')\n",
    "\n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "\n",
    "properties = [\n",
    "    {\n",
    "        'name': \"Young's Modulus\",\n",
    "        'target': {'type': 'range', 'min': 100, 'max': 200},\n",
    "        'actual': 180,\n",
    "        'range': (50, 250)\n",
    "    },\n",
    "    {\n",
    "        'name': \"Poisson's Ratio\",\n",
    "        'target': {'type': 'upper', 'value': 0.3},\n",
    "        'actual': 0.35,\n",
    "        'range': (0.0, 0.5)\n",
    "    },\n",
    "    {\n",
    "        'name': \"Density\",\n",
    "        'target': {'type': 'value', 'value': 5.0},\n",
    "        'actual': 5.5,\n",
    "        'range': (4.0, 6.0)\n",
    "    }\n",
    "]\n",
    "\n",
    "plot_inverse_design_results(properties)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1d6fdd55",
   "metadata": {},
   "outputs": [],
   "source": [
    "import plotly.graph_objects as go\n",
    "import plotly.io as pio\n",
    "\n",
    "def plot_inverse_design_plotly(properties, filename_base=\"inverse_design_results\"):\n",
    "    fig = go.Figure()\n",
    "\n",
    "    height_per_row = 100\n",
    "    total_height = len(properties) * height_per_row\n",
    "\n",
    "    for i, prop in enumerate(properties):\n",
    "        y_base = (len(properties) - i - 1) * height_per_row\n",
    "\n",
    "        # Draw background number line\n",
    "        fig.add_shape(type=\"line\",\n",
    "                      x0=prop['range'][0], x1=prop['range'][1],\n",
    "                      y0=y_base + 50, y1=y_base + 50,\n",
    "                      line=dict(color=\"lightgray\", width=2))\n",
    "\n",
    "        # Add target shading\n",
    "        target = prop['target']\n",
    "        if target['type'] == 'value':\n",
    "            val = target['value']\n",
    "            width = 0.01 * (prop['range'][1] - prop['range'][0])\n",
    "            fig.add_shape(type=\"rect\",\n",
    "                          x0=val - width, x1=val + width,\n",
    "                          y0=y_base + 20, y1=y_base + 80,\n",
    "                          fillcolor=\"lightblue\", opacity=0.4, line_width=0)\n",
    "        elif target['type'] == 'upper':\n",
    "            fig.add_shape(type=\"rect\",\n",
    "                          x0=prop['range'][0], x1=target['value'],\n",
    "                          y0=y_base + 20, y1=y_base + 80,\n",
    "                          fillcolor=\"lightblue\", opacity=0.4, line_width=0)\n",
    "        elif target['type'] == 'lower':\n",
    "            fig.add_shape(type=\"rect\",\n",
    "                          x0=target['value'], x1=prop['range'][1],\n",
    "                          y0=y_base + 20, y1=y_base + 80,\n",
    "                          fillcolor=\"lightblue\", opacity=0.4, line_width=0)\n",
    "        elif target['type'] == 'range':\n",
    "            fig.add_shape(type=\"rect\",\n",
    "                          x0=target['min'], x1=target['max'],\n",
    "                          y0=y_base + 20, y1=y_base + 80,\n",
    "                          fillcolor=\"lightblue\", opacity=0.4, line_width=0)\n",
    "\n",
    "        # Add actual result (dot or arrow)\n",
    "        actual = prop['actual']\n",
    "        if actual < prop['range'][0]:\n",
    "            fig.add_annotation(x=prop['range'][0], y=y_base + 50,\n",
    "                               text=\"←\", showarrow=False,\n",
    "                               font=dict(size=20, color=\"red\"))\n",
    "        elif actual > prop['range'][1]:\n",
    "            fig.add_annotation(x=prop['range'][1], y=y_base + 50,\n",
    "                               text=\"→\", showarrow=False,\n",
    "                               font=dict(size=20, color=\"red\"))\n",
    "        else:\n",
    "            fig.add_trace(go.Scatter(\n",
    "                x=[actual],\n",
    "                y=[y_base + 50],\n",
    "                mode='markers',\n",
    "                marker=dict(size=12, color='red'),\n",
    "                text=[f\"{prop['name']}: {actual:.3f}\"],\n",
    "                hoverinfo='text'\n",
    "            ))\n",
    "\n",
    "        # Add property label\n",
    "        fig.add_annotation(x=prop['range'][0] - 0.05 * (prop['range'][1] - prop['range'][0]),\n",
    "                           y=y_base + 50,\n",
    "                           text=f\"<b>{prop['name']}</b>\",\n",
    "                           showarrow=False, xanchor=\"right\", font=dict(size=12))\n",
    "\n",
    "    fig.update_layout(\n",
    "        height=total_height + 100,\n",
    "        margin=dict(l=80, r=30, t=30, b=30),\n",
    "        xaxis=dict(visible=False),\n",
    "        yaxis=dict(visible=False),\n",
    "        plot_bgcolor=\"white\",\n",
    "        showlegend=False\n",
    "    )\n",
    "\n",
    "    # Save to HTML (interactive) and SVG/PDF\n",
    "    pio.write_html(fig, f\"{filename_base}.html\")\n",
    "    pio.write_image(fig, f\"{filename_base}.svg\")\n",
    "    pio.write_image(fig, f\"{filename_base}.pdf\")\n",
    "\n",
    "    fig.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cae9cd17",
   "metadata": {},
   "outputs": [],
   "source": [
    "properties = [\n",
    "    {\n",
    "        'name': \"Young's Modulus\",\n",
    "        'target': {'type': 'range', 'min': 100, 'max': 200},\n",
    "        'actual': 180,\n",
    "        'range': (50, 250)\n",
    "    },\n",
    "    {\n",
    "        'name': \"Poisson's Ratio\",\n",
    "        'target': {'type': 'upper', 'value': 0.3},\n",
    "        'actual': 0.35,\n",
    "        'range': (0.0, 0.5)\n",
    "    },\n",
    "    {\n",
    "        'name': \"Density\",\n",
    "        'target': {'type': 'value', 'value': 5.0},\n",
    "        'actual': 5.5,\n",
    "        'range': (4.0, 6.0)\n",
    "    }\n",
    "]\n",
    "\n",
    "plot_inverse_design_plotly(properties)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a63ef729",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install kaleido"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "05cf010e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "df = pd.read_parquet('/data/metagen-data/v3/workspace/benchmark_results.parquet', engine='fastparquet')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "92dc0a7c",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6e5aa961",
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_pivot(df):\n",
    "    df = df[df.metric.isin(['Valid', 'IoU', 'Chamfer Distance', 'Average Normalized Error'])].copy()\n",
    "\n",
    "    # Define mappings\n",
    "    category_mapping = {\n",
    "        'inverse_design': 'Inverse Design',\n",
    "        'material_understanding': 'Material Understanding',\n",
    "        'reconstruction': 'Reconstruction'\n",
    "    }\n",
    "\n",
    "    metric_mapping = {\n",
    "        'Average Normalized Error': 'Error',\n",
    "        'Chamfer Distance': 'CD'\n",
    "    }\n",
    "\n",
    "    # Apply mappings\n",
    "    df['Category'] = df['Category'].replace(category_mapping)\n",
    "    df['metric'] = df['metric'].replace(metric_mapping)\n",
    "\n",
    "    # Rename the column 'metric' to 'Metric'\n",
    "    df = df.rename(columns={'metric': 'Metric'})\n",
    "\n",
    "    pivot_table = pd.pivot_table(\n",
    "    df,\n",
    "    index='Model',\n",
    "    columns=['Category', 'Metric'],\n",
    "    values='value',\n",
    "    aggfunc='mean'\n",
    "    )\n",
    "    return pivot_table\n",
    "\n",
    "# Define which metrics should be minimized or maximized\n",
    "minimize_metrics = {'Error', 'CD'}\n",
    "maximize_metrics = {'IoU', 'Valid'}\n",
    "\n",
    "# Function to bold best values\n",
    "def highlight_best(df):\n",
    "    formatted_df = df.copy()\n",
    "\n",
    "    for col in df.columns:\n",
    "        values = df[col]\n",
    "        metric_name = col[1]  # assuming MultiIndex columns: (Category, Metric)\n",
    "\n",
    "        # Determine best value (ignoring NaNs)\n",
    "        if metric_name in minimize_metrics:\n",
    "            best_val = values.min(skipna=True)\n",
    "        elif metric_name in maximize_metrics:\n",
    "            best_val = values.max(skipna=True)\n",
    "        else:\n",
    "            continue\n",
    "\n",
    "        def format_value(x):\n",
    "            if pd.isna(x):\n",
    "                return r'\\textemdash{}'  # Represent NaNs as a dash\n",
    "            if metric_name == 'Valid':\n",
    "                percent = x * 100\n",
    "                formatted = \"100\\\\%\" if percent == 100 else f\"{percent:.1f}\\\\%\"\n",
    "            else:\n",
    "                formatted = f\"{x:.3f}\"\n",
    "            return f\"\\\\textbf{{{formatted}}}\" if x == best_val else formatted\n",
    "\n",
    "        formatted_df[col] = values.apply(format_value)\n",
    "\n",
    "    return formatted_df\n",
    "\n",
    "def to_latex(formatted_pivot):\n",
    "    n_cols = formatted_pivot.shape[1] + 1\n",
    "    col_fmt = 'c' * n_cols\n",
    "\n",
    "    latex = formatted_pivot.to_latex(\n",
    "        escape=False,\n",
    "        multicolumn=True,\n",
    "        multicolumn_format='c',\n",
    "        column_format=col_fmt\n",
    "    )\n",
    "\n",
    "    return latex\n",
    "\n",
    "def make_category_pivot(df, category):\n",
    "    df = df[df.metric.isin(['Valid', 'IoU', 'Chamfer Distance', 'Average Normalized Error'])].copy()\n",
    "\n",
    "    # Define mappings\n",
    "    category_mapping = {\n",
    "        'inverse_design': 'Inverse Design',\n",
    "        'material_understanding': 'Material Understanding',\n",
    "        'reconstruction': 'Reconstruction'\n",
    "    }\n",
    "\n",
    "    metric_mapping = {\n",
    "        'Average Normalized Error': 'Error',\n",
    "        'Chamfer Distance': 'CD'\n",
    "    }\n",
    "\n",
    "    task_mapping = {\n",
    "        'multiview_and_code_material_understanding': '4 View + Code',\n",
    "        'single_view_material_understanding': '1 View',\n",
    "        '4_target_inverse_design': '4 Target',\n",
    "        '2_view_reconstruction': '2 View',\n",
    "        '6_target_inverse_design': '6 Target',\n",
    "        '3_view_reconstruction': '3 View',\n",
    "        '1_view_reconstruction': '1 View',\n",
    "        '5_target_inverse_design': '5 Target',\n",
    "        '3_target_inverse_design': '3 Target',\n",
    "        '2_target_inverse_design': '2 Target',\n",
    "        '4_view_reconstruction': '4 View',\n",
    "        '1_target_inverse_design': '1 Target'\n",
    "    }\n",
    "\n",
    "    df = df[df.Category == category].copy()\n",
    "\n",
    "    # Apply mappings\n",
    "    df['Category'] = df['Category'].replace(category_mapping)\n",
    "    df['metric'] = df['metric'].replace(metric_mapping)\n",
    "    df['Task'] = df['Task'].replace(task_mapping)\n",
    "\n",
    "    # Rename the column 'metric' to 'Metric'\n",
    "    df = df.rename(columns={'metric': 'Metric'})\n",
    "\n",
    "    pivot_table = pd.pivot_table(\n",
    "    df,\n",
    "    index='Model',\n",
    "    columns=['Task', 'Metric'],\n",
    "    values='value',\n",
    "    aggfunc='mean'\n",
    "    )\n",
    "    return pivot_table\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "eb338287",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "# Define metric behavior\n",
    "minimize_metrics = {'Error', 'CD'}\n",
    "maximize_metrics = {'IoU', 'Valid'}\n",
    "ci_metrics = {'Error', 'CD', 'IoU'}\n",
    "\n",
    "# Compute aggregates\n",
    "group_cols = ['Model', 'Category', 'metric']\n",
    "agg = df.groupby(group_cols)['value'].agg(['mean', 'std', 'count']).reset_index()\n",
    "\n",
    "# Compute 95% CI\n",
    "agg['ci'] = 1.96 * agg['std'] / np.sqrt(agg['count'])\n",
    "\n",
    "# Pivot each value separately\n",
    "mean_pivot = agg.pivot(index='Model', columns=['Category', 'metric'], values='mean')\n",
    "ci_pivot = agg.pivot(index='Model', columns=['Category', 'metric'], values='ci')\n",
    "\n",
    "# Format the table with value + CI + bolding\n",
    "def format_with_ci(mean_df, ci_df):\n",
    "    formatted = mean_df.copy()\n",
    "\n",
    "    for col in mean_df.columns:\n",
    "        metric = col[1]\n",
    "        mean_values = mean_df[col]\n",
    "        ci_values = ci_df[col]\n",
    "\n",
    "        # Get best value (min or max) for bolding\n",
    "        if metric in minimize_metrics:\n",
    "            best_val = mean_values.min(skipna=True)\n",
    "        elif metric in maximize_metrics:\n",
    "            best_val = mean_values.max(skipna=True)\n",
    "        else:\n",
    "            best_val = None\n",
    "\n",
    "        def fmt(val, ci):\n",
    "            if pd.isna(val):\n",
    "                return r'\\textemdash{}'\n",
    "\n",
    "            # Format the base value\n",
    "            if metric == 'Valid':\n",
    "                percent = val * 100\n",
    "                val_str = \"100\\\\%\" if percent == 100 else f\"{percent:.1f}\\\\%\"\n",
    "                return f\"\\\\textbf{{{val_str}}}\" if val == best_val else val_str\n",
    "\n",
    "            # Format CI metrics\n",
    "            val_fmt = f\"{val:.3f}\"\n",
    "            if metric in ci_metrics and not pd.isna(ci):\n",
    "                ci_fmt = f\"\\\\scriptsize{{(±{ci:.3f})}}\"\n",
    "                val_fmt += f\"\\\\\\\\{ci_fmt}\"\n",
    "\n",
    "            return f\"\\\\textbf{{{val_fmt}}}\" if val == best_val else val_fmt\n",
    "\n",
    "        formatted[col] = [\n",
    "            fmt(val, ci) for val, ci in zip(mean_values, ci_values)\n",
    "        ]\n",
    "\n",
    "    return formatted\n",
    "\n",
    "# Apply formatting\n",
    "formatted_table = format_with_ci(mean_pivot, ci_pivot)\n",
    "\n",
    "# Build column format string (centered, including index)\n",
    "col_format = 'c' * (formatted_table.shape[1] + 1)\n",
    "\n",
    "# Export to LaTeX\n",
    "latex = formatted_table.to_latex(\n",
    "    escape=False,\n",
    "    multicolumn=True,\n",
    "    multicolumn_format='c',\n",
    "    column_format=col_format\n",
    ")\n",
    "\n",
    "print(latex)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c0f58ee",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "06ff1e8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c3fba33",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/data/metagen-data/v3/models/authored/cubic_plus_octet_foam/code.py','r') as f:\n",
    "    code = f.read()\n",
    "print(code)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a19becef",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('/data/metagen-data/v3/models/hybrid/cubic_plus_octet_foam-discretely_assembled_auxetic-octet_truss/code.py','r') as f:\n",
    "    code = f.read()\n",
    "print(code)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7f09ef0",
   "metadata": {},
   "outputs": [],
   "source": [
    "from metagen import *\n",
    "\n",
    "def make_skeleton_cubic_plates(plate_thickness:float=0.03) -> LiftedSkeleton:\n",
    "    v0 = vertex(tet.corners.TOP_BACK)\n",
    "    v1 = vertex(tet.corners.BOTTOM_BACK)\n",
    "    v2 = vertex(tet.corners.BOTTOM_LEFT)\n",
    "\n",
    "    c0 = Polyline([v0, v1, v2, v0])\n",
    "    \n",
    "    skel = skeleton([c0])\n",
    "    return UniformDirectShell(skel, plate_thickness)\n",
    "\n",
    "def make_skeleton_octet_plates(plate_thickness:float=0.03) -> list[LiftedSkeleton]:\n",
    "    v0 = vertex(tet.corners.BACK_TOP)\n",
    "    v1 = vertex(tet.corners.BOTTOM_LEFT)\n",
    "    v2 = vertex(tet.edges.BOTTOM_RIGHT, [0.5])\n",
    "    v3 = vertex(tet.edges.TOP_RIGHT, [2.0/3.0])\n",
    "\n",
    "    c0 = Polyline([v0, v1, v2, v0])\n",
    "    skel0 = skeleton([c0])\n",
    "    cubic_plate0 = UniformDirectShell(skel0, plate_thickness)\n",
    "\n",
    "    c1 = Polyline([v1, v2, v3, v1])\n",
    "    skel1 = skeleton([c1])\n",
    "    cubic_plate1 = UniformDirectShell(skel1, plate_thickness)\n",
    "\n",
    "    return [cubic_plate0, cubic_plate1]\n",
    "\n",
    "\n",
    "def make_structure(cubic_plate_thickness:float=0.03, octet_plate_thickness:float=0.03) -> Structure:\n",
    "    embedding = tet.embed(0.5)\n",
    "    pat = TetFullMirror()\n",
    "\n",
    "    cubic_plates = make_skeleton_cubic_plates(cubic_plate_thickness)\n",
    "    [octet_plates_1, octet_plates_2] = make_skeleton_octet_plates(octet_plate_thickness)\n",
    "\n",
    "    tile = Tile([cubic_plates, octet_plates_1, octet_plates_2], embedding)\n",
    "    return Structure(tile, pat)\n",
    "\n",
    "make_structure()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2f11eb18",
   "metadata": {},
   "outputs": [],
   "source": [
    "from metagen import *\n",
    "\n",
    "def make_structure(cubic_plate_thickness: float = 0.03,\n",
    "                   octet_plate_thickness: float = 0.03,\n",
    "                   beam_radius: float = 0.1) -> Structure:\n",
    "    # Embed a tet tile and set up full-mirror tiling\n",
    "    embedding = tet.embed(0.5)\n",
    "    pat = TetFullMirror()\n",
    "\n",
    "    # Cubic plate on one face of the tet\n",
    "    v0 = vertex(tet.corners.TOP_BACK)\n",
    "    v1 = vertex(tet.corners.BOTTOM_BACK)\n",
    "    v2 = vertex(tet.corners.BOTTOM_LEFT)\n",
    "    plate_loop = Polyline([v0, v1, v2, v0])\n",
    "    plate_skel = skeleton([plate_loop])\n",
    "    cubic_plate = UniformDirectShell(plate_skel, cubic_plate_thickness)\n",
    "\n",
    "    # Two octet plates on adjacent faces/edges\n",
    "    o0 = vertex(tet.corners.BACK_TOP)\n",
    "    o1 = vertex(tet.corners.BOTTOM_LEFT)\n",
    "    o2 = vertex(tet.edges.BOTTOM_RIGHT, [0.5])\n",
    "    o3 = vertex(tet.edges.TOP_RIGHT, [2.0/3.0])\n",
    "    oct0_loop = Polyline([o0, o1, o2, o0])\n",
    "    oct0_skel = skeleton([oct0_loop])\n",
    "    octet_plate0 = UniformDirectShell(oct0_skel, octet_plate_thickness)\n",
    "    oct1_loop = Polyline([o1, o2, o3, o1])\n",
    "    oct1_skel = skeleton([oct1_loop])\n",
    "    octet_plate1 = UniformDirectShell(oct1_skel, octet_plate_thickness)\n",
    "\n",
    "    # Curved beam threading through the tet interior\n",
    "    b0 = vertex(tet.corners.BOTTOM_LEFT)\n",
    "    b1 = vertex(tet.faces.LEFT, [0.7, 0.0])\n",
    "    b2 = vertex(tet.faces.LEFT, [0.15, 0.7])\n",
    "    b3 = vertex(tet.edges.BACK, [0.7])\n",
    "    beam_path = Polyline([b0, b1, b2, b3])\n",
    "    beam_skel = skeleton([beam_path])\n",
    "    curved_beam = UniformBeams(beam_skel, beam_radius)\n",
    "\n",
    "    # Assemble tile and build the periodic structure\n",
    "    tile = Tile(\n",
    "        [cubic_plate, octet_plate0, octet_plate1, curved_beam],\n",
    "        embedding\n",
    "    )\n",
    "    return Structure(tile, pat)\n",
    "\n",
    "make_structure()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44f41d5f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
