{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MCQ to open-ended question answering\n",
    "This notebook is for inspecting the MCQ datasets and seeing if they are \"trivially\" transferable to open-ended question answering tasks where an llm-as-judge is used to evaluate the correctness of the answers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import yaml\n",
    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
    "from tqdm import tqdm\n",
    "import json\n",
    "from typing import Union, Callable\n",
    "import time\n",
    "import datetime\n",
    "\n",
    "import datasets\n",
    "from datasets import load_dataset\n",
    "from openai import OpenAI"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Setup llm filtering judge client\n",
    "if os.path.exists(\"configs/acd/default.yaml\"):\n",
    "    with open(\"configs/acd/default.yaml\", \"r\") as f:\n",
    "        acd_config = yaml.safe_load(f)\n",
    "    model_url = f\"http://{acd_config['vllm_server_host']}:{acd_config['vllm_server_port']}/v1\"\n",
    "    model_name = acd_config[\"scientist_model\"]\n",
    "    if model_name.startswith(\"vllm/\"):\n",
    "        model_name = \"/\".join(model_name.split(\"/\")[1:])\n",
    "else:\n",
    "    # hardcode the url\n",
    "    model_url = \"http://<your IP address>:<Your port>/v1\"\n",
    "    model_name = \"Qwen/Qwen2.5-72B-Instruct\"\n",
    "\n",
    "client = OpenAI(api_key=\"empty\", base_url=model_url)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "JUDGE_SYSTEM_PROMPT = \"\"\"\n",
    "You are a professional educator. Your job is it to evaluate whether a question is unambiguous and can be answered without the multiple choice options. You need to determine whether it is clear what the question is asking.\n",
    "\n",
    "You will be given the parsed question that you need to evaluate.\n",
    "\n",
    "A valid question here means:\n",
    "- The standalone question can be answered without the multiple choice options.\n",
    "- It is clear what the question is asking.\n",
    "- Even if a question is posed as a text continuaiton task, if the continuation can be generated without the context of multiple choice options, then it is valid.\n",
    "- If the question contains anything along the lines of \"Which of the following...\", then the question is not valid.\n",
    "\n",
    "Respond precisely in the following format:\n",
    "\n",
    "THOUGHT:\n",
    "<THOUGHT>\n",
    "\n",
    "DECISION:\n",
    "<DECISION>\n",
    "\n",
    "In <THOUGHT>, briefly reason about the question and whether it can be answered without the multiple choice answers.\n",
    "\n",
    "In <DECISION>, provide your answer as either \"Yes\" or \"No\".\n",
    "\"\"\"\n",
    "\n",
    "JUDGE_USER_PROMPT = \"\"\"\n",
    "Question:\n",
    "{question}\n",
    "\"\"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Helpers for getting filtering judge decisions\n",
    "itoa_mapping = {\n",
    "    0: \"A\",\n",
    "    1: \"B\",\n",
    "    2: \"C\",\n",
    "    3: \"D\",\n",
    "    4: \"E\",\n",
    "    5: \"F\",\n",
    "    6: \"G\",\n",
    "    7: \"H\",\n",
    "    8: \"I\",\n",
    "    9: \"J\",\n",
    "    10: \"K\",\n",
    "    11: \"L\",\n",
    "    12: \"M\",\n",
    "    13: \"N\",\n",
    "    14: \"O\",\n",
    "    15: \"P\",\n",
    "    16: \"Q\",\n",
    "    17: \"R\",\n",
    "    18: \"S\",\n",
    "    19: \"T\",\n",
    "    20: \"U\",\n",
    "    21: \"V\",\n",
    "    22: \"W\",\n",
    "    23: \"X\",\n",
    "    24: \"Y\",\n",
    "    25: \"Z\",\n",
    "}\n",
    "\n",
    "def extract_judge_decision(response: str) -> bool:\n",
    "    \"\"\"\n",
    "    Extract the judge's decision from the response and return it as a boolean.\n",
    "    \"\"\"\n",
    "    try:\n",
    "        decision = response.split(\"DECISION:\")[1].strip()\n",
    "        if \"Yes\" in decision:\n",
    "            return True\n",
    "        elif \"No\" in decision:\n",
    "            return False\n",
    "        else:\n",
    "            # If no decision is found, then return False\n",
    "            return False\n",
    "    except Exception as e:\n",
    "        print(f\"Error extracting judge decision: {e}\\nResponse: {response}\")\n",
    "        return False\n",
    "\n",
    "def ask_judge(\n",
    "    client: OpenAI,\n",
    "    model_name: str,\n",
    "    question: str,\n",
    "    choices: list[str],\n",
    "    target: Union[int, str],\n",
    "    subject: str,\n",
    "    logging_dict: dict = None,\n",
    "    id: int = None,\n",
    ") -> bool:\n",
    "    \"\"\"\n",
    "    Ask a judge llm to evaluate whether a question that was originally a multiple choice question is valid as an open-ended question.\n",
    "\n",
    "    \"\"\"\n",
    "\n",
    "    choices_str = \"\\n\".join([f\"{itoa_mapping[i]}. {choice}\" for i, choice in enumerate(choices)])\n",
    "    if isinstance(target, int):\n",
    "        target_str = choices[target]\n",
    "    else:\n",
    "        target_str = target\n",
    "    prompt = JUDGE_USER_PROMPT.format(\n",
    "        question=question,\n",
    "        choices=choices_str,\n",
    "        target=target_str\n",
    "    )\n",
    "\n",
    "    response = client.chat.completions.create(\n",
    "        model=model_name,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": JUDGE_SYSTEM_PROMPT},\n",
    "            {\"role\": \"user\", \"content\": prompt}\n",
    "        ],\n",
    "    )\n",
    "\n",
    "    judge_response = response.choices[0].message.content\n",
    "    judge_decision = extract_judge_decision(judge_response)\n",
    "\n",
    "    if logging_dict is not None:\n",
    "        logging_dict[id] = {\n",
    "            \"question\": question,\n",
    "            \"choices\": choices,\n",
    "            \"target\": target,\n",
    "            \"subject\": subject,\n",
    "            \"judge_response\": judge_response,\n",
    "            \"is_valid\": judge_decision,\n",
    "        }\n",
    "\n",
    "    return judge_decision"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_dataset(\n",
    "    dataset: Union[datasets.Dataset, dict[str, datasets.Dataset]],\n",
    "    output_dir: str,\n",
    "    process_item_fn: Callable,\n",
    "    max_workers: int = 64,\n",
    "):\n",
    "    # Add sample details in place into the answer_dict\n",
    "    answer_dict = {}\n",
    "    if isinstance(dataset, dict):\n",
    "        for subject in tqdm(dataset.keys()):\n",
    "            ds = dataset[subject]\n",
    "            with ThreadPoolExecutor(max_workers=max_workers) as executor:\n",
    "                # Create a dictionary of futures\n",
    "                future_to_item = {\n",
    "                    executor.submit(process_item_fn, item, i, answer_dict, subject): i \n",
    "                    for i, item in enumerate(ds)\n",
    "                }\n",
    "                \n",
    "                # Process results as they complete with progress bar\n",
    "                for future in tqdm(as_completed(future_to_item), total=len(future_to_item)):\n",
    "                    try:\n",
    "                        future.result()\n",
    "                    except Exception as e:\n",
    "                        print(f\"Error processing item {future_to_item[future]}: {e}\")\n",
    "    else:\n",
    "        with ThreadPoolExecutor(max_workers=max_workers) as executor:\n",
    "            # Create a dictionary of futures\n",
    "            future_to_item = {\n",
    "                executor.submit(process_item_fn, item, i, answer_dict): i \n",
    "                for i, item in enumerate(dataset)\n",
    "            }\n",
    "\n",
    "            # Process results as they complete with progress bar\n",
    "            for future in tqdm(as_completed(future_to_item), total=len(future_to_item)):\n",
    "                try:\n",
    "                    future.result()\n",
    "                except Exception as e:\n",
    "                    print(f\"Error processing item {future_to_item[future]}: {e}\")\n",
    "\n",
    "    # sort by id\n",
    "    answer_dict = dict(sorted(answer_dict.items(), key=lambda x: x[0]))\n",
    "\n",
    "    valid_samples = [item for item in answer_dict.values() if item[\"is_valid\"]]\n",
    "    invalid_samples = [item for item in answer_dict.values() if not item[\"is_valid\"]]\n",
    "\n",
    "    print(f\"Valid samples: {len(valid_samples)}\")\n",
    "    print(f\"Invalid samples: {len(invalid_samples)}\")\n",
    "\n",
    "    # save the valid and invalid samples\n",
    "    with open(f\"{output_dir}/valid_samples.json\", \"w\") as f:\n",
    "        json.dump(valid_samples, f)\n",
    "\n",
    "    with open(f\"{output_dir}/invalid_samples.json\", \"w\") as f:\n",
    "        json.dump(invalid_samples, f)\n",
    "\n",
    "    # Get all subjects\n",
    "    subjects = list(set([item[\"subject\"] for item in valid_samples + invalid_samples]))\n",
    "\n",
    "    # Count the number of valid and invald samples per subject\n",
    "    subject_valid_counts = {subject: 0 for subject in subjects}\n",
    "    subject_invalid_counts = {subject: 0 for subject in subjects}\n",
    "\n",
    "    for item in valid_samples:\n",
    "        subject = item[\"subject\"]\n",
    "        subject_valid_counts[subject] = subject_valid_counts.get(subject, 0) + 1\n",
    "\n",
    "    for item in invalid_samples:\n",
    "        subject = item[\"subject\"]\n",
    "        subject_invalid_counts[subject] = subject_invalid_counts.get(subject, 0) + 1\n",
    "\n",
    "    # sort the subjects by the number of valid samples\n",
    "    sorted_subjects = sorted(subject_valid_counts.keys(), key=lambda x: subject_valid_counts[x], reverse=True)\n",
    "\n",
    "    # percentages of valid samples per subject\n",
    "    subject_valid_percentages = {subject: subject_valid_counts[subject] / (subject_valid_counts[subject] + subject_invalid_counts[subject]) for subject in sorted_subjects}\n",
    "    # Sort the dictionary by the values (decreasing)\n",
    "    subject_valid_percentages = dict(sorted(subject_valid_percentages.items(), key=lambda x: x[1], reverse=True))\n",
    "\n",
    "    # percentages of invalid samples per subject\n",
    "    subject_invalid_percentages = {subject: subject_invalid_counts[subject] / (subject_valid_counts[subject] + subject_invalid_counts[subject]) for subject in sorted_subjects}\n",
    "    # Sort the dictionary by the values (decreasing)\n",
    "    subject_invalid_percentages = dict(sorted(subject_invalid_percentages.items(), key=lambda x: x[1], reverse=True))\n",
    "\n",
    "    # save the percentages\n",
    "    with open(f\"{output_dir}/subject_valid_percentages.json\", \"w\") as f:\n",
    "        json.dump(subject_valid_percentages, f)\n",
    "\n",
    "    with open(f\"{output_dir}/subject_invalid_percentages.json\", \"w\") as f:\n",
    "        json.dump(subject_invalid_percentages, f)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MMLU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mmlu_dataset = load_dataset(\"cais/mmlu\", \"all\")\n",
    "ds = mmlu_dataset[\"test\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "date_now = datetime.datetime.now().strftime(\"%Y_%m_%d\")\n",
    "time_now = datetime.datetime.now().strftime(\"%H_%M_%S\")\n",
    "\n",
    "output_dir = f\"dataset_filtering/mmlu/{date_now}/{time_now}\"\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "\n",
    "# iterate over the dataset\n",
    "def process_item(item, i, answer_dict):\n",
    "    question = item[\"question\"]\n",
    "    choices = item[\"choices\"]\n",
    "    target = item[\"answer\"]\n",
    "    subject = item[\"subject\"]\n",
    "    return ask_judge(\n",
    "        client=client,\n",
    "        model_name=model_name,\n",
    "        question=question,\n",
    "        choices=choices,\n",
    "        target=target,\n",
    "        subject=subject,\n",
    "        logging_dict=answer_dict,\n",
    "        id=i,\n",
    "    )\n",
    "\n",
    "process_dataset(\n",
    "    dataset=mmlu_dataset,\n",
    "    output_dir=output_dir,\n",
    "    process_item_fn=process_item,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Prompt v0** \\\n",
    "Valid samples: 13756 \\\n",
    "Invalid samples: 286\n",
    "\n",
    "**Prompt v1** \\\n",
    "Valid samples: 9616 \\\n",
    "Invalid samples: 4426\n",
    "\n",
    "**Prompt v2** \\\n",
    "Valid samples: 6619 \\\n",
    "Invalid samples: 7423"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Big Bench Hard"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Dataset has only `input` and `target` fields.\n",
    "\n",
    "Not all tasks are MCQ tasks.\n",
    "Those that are have the options directly in the `input` field.\n",
    "\n",
    "The options are formatted as follows:\n",
    "```\n",
    "<question>\n",
    "Options:\n",
    "(A) <option_a>\n",
    "(B) <option_b>\n",
    "...\n",
    "```\n",
    "\n",
    "The target is the letter of the correct option.\n",
    "\n",
    "We can easily extract the options (and the target) from the `input` field and remove them from the `input` field.\n",
    "\n",
    "After that, we can ask the judge to evaluate whether the question is valid as an open-ended question."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_path = \"SaylorTwift/bbh\"\n",
    "subset_to_dataset_mapping = {}\n",
    "for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):\n",
    "    ds = load_dataset(dataset_path, task, cache_dir=\"./dataset_cache\")[\"test\"]\n",
    "    subset_to_dataset_mapping[task] = ds"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "failed_samples = {}\n",
    "\n",
    "date_now = datetime.datetime.now().strftime(\"%Y_%m_%d\")\n",
    "time_now = datetime.datetime.now().strftime(\"%H_%M_%S\")\n",
    "\n",
    "output_dir = f\"dataset_filtering/bbh/{date_now}/{time_now}\"\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "\n",
    "def extract_choices(input_text):\n",
    "    # find the line that starts with \"Options:\"\n",
    "    options = input_text.split(\"Options:\")[1].strip()\n",
    "    if options:\n",
    "        # extract the options after \"Options:\"\n",
    "        options = options.split(\"\\n\")\n",
    "        final_options = []\n",
    "        for option in options:\n",
    "            if \"Yes\" in option:\n",
    "                final_options.append(\"Yes\")\n",
    "            elif \"No\" in option:\n",
    "                final_options.append(\"No\")\n",
    "            else:\n",
    "                final_options.append(option.strip())\n",
    "        return final_options\n",
    "    else:\n",
    "        return []\n",
    "\n",
    "# iterate over the dataset\n",
    "def process_item(item, i, answer_dict, subject):\n",
    "    try:\n",
    "        input_text = item[\"input\"]\n",
    "        target = item[\"target\"]\n",
    "\n",
    "        id = f\"{subject}_{i}\"\n",
    "        \n",
    "        # extract the choices and the target\n",
    "        if \"Options:\" in input_text:\n",
    "            choices = extract_choices(input_text)\n",
    "            if not isinstance(target, str): # When target is 'Yes' or 'No'\n",
    "                target = choices[target]\n",
    "            # remove the options from the input\n",
    "            question = input_text.split(\"Options:\")[0].strip()\n",
    "        else:\n",
    "            choices = []\n",
    "            question = input_text\n",
    "\n",
    "        judge_response = ask_judge(\n",
    "            client=client,\n",
    "            model_name=model_name,\n",
    "            question=question,\n",
    "            choices=choices,\n",
    "            target=target,\n",
    "            subject=subject,\n",
    "            logging_dict=answer_dict,\n",
    "            id=id,\n",
    "        )\n",
    "        return judge_response\n",
    "    except Exception as e:\n",
    "        id = f\"{subject}_{i}\"\n",
    "        print(f\"Error processing item {id}: {e}\")\n",
    "        failed_samples[id] = {\n",
    "            \"input\": item[\"input\"],\n",
    "            \"target\": item[\"target\"],\n",
    "            \"subject\": subject,\n",
    "            \"error\": str(e),\n",
    "        }\n",
    "        return None\n",
    "\n",
    "process_dataset(\n",
    "    dataset=subset_to_dataset_mapping,\n",
    "    output_dir=output_dir,\n",
    "    process_item_fn=process_item\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Prompt v2** \\\n",
    "Valid samples: 5381 \\\n",
    "Invalid samples: 1380"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MMLU Pro"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Dataset contains the following structure:\n",
    "```\n",
    "{\n",
    "    \"question_id\": int,\n",
    "    \"question\": str,\n",
    "    \"options\": list[str],\n",
    "    \"answer\": str,\n",
    "    \"answer_index\": int,\n",
    "    \"category\": str,\n",
    "    \"src\": str,\n",
    "}\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the dataset\n",
    "dataset_path = \"TIGER-Lab/MMLU-Pro\"\n",
    "ds_test = load_dataset(dataset_path)[\"test\"]\n",
    "# validation set is used for in context examples. There are 5 per subject.\n",
    "ds_val = load_dataset(dataset_path)[\"validation\"]\n",
    "ds = {\n",
    "    \"test\": ds_test,\n",
    "    \"validation\": ds_val,\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# process the dataset\n",
    "failed_samples = {}\n",
    "\n",
    "date_now = datetime.datetime.now().strftime(\"%Y_%m_%d\")\n",
    "time_now = datetime.datetime.now().strftime(\"%H_%M_%S\")\n",
    "\n",
    "output_dir = f\"dataset_filtering/mmlu_pro/{date_now}/{time_now}\"\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "\n",
    "def process_item(item, i, answer_dict, split, **kwargs):\n",
    "    question = item[\"question\"]\n",
    "    choices = item[\"options\"]\n",
    "    target = item[\"answer_index\"]\n",
    "    subject = item[\"category\"]\n",
    "    id = item[\"question_id\"]\n",
    "    id = f\"{split}_{id}\"\n",
    "    return ask_judge(\n",
    "        client=client,\n",
    "        model_name=model_name,\n",
    "        question=question,\n",
    "        choices=choices,\n",
    "        target=target,\n",
    "        subject=subject,\n",
    "        logging_dict=answer_dict,\n",
    "        id=id,\n",
    "    )\n",
    "\n",
    "process_dataset(\n",
    "    dataset=ds,\n",
    "    output_dir=output_dir,\n",
    "    process_item_fn=process_item,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Prompt v2** \\\n",
    "Valid samples: 8599 \\\n",
    "Invalid samples: 3433"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GPQA"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Diamond subset has only 198 samples. \\\n",
    "Main subset has 448 samples."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the dataset\n",
    "dataset_path= \"Idavidrein/gpqa\"\n",
    "ds_main = load_dataset(dataset_path, \"gpqa_main\")[\"train\"]\n",
    "ds_diamond = load_dataset(dataset_path, \"gpqa_diamond\")[\"train\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# process the dataset\n",
    "\n",
    "date_now = datetime.datetime.now().strftime(\"%Y_%m_%d\")\n",
    "time_now = datetime.datetime.now().strftime(\"%H_%M_%S\")\n",
    "\n",
    "def process_item(item, i, answer_dict):\n",
    "    question = item[\"Question\"]\n",
    "    choices = [\n",
    "        item[\"Correct Answer\"],\n",
    "        item[\"Incorrect Answer 1\"],\n",
    "        item[\"Incorrect Answer 2\"],\n",
    "        item[\"Incorrect Answer 3\"],\n",
    "    ]\n",
    "    target = item[\"Correct Answer\"]\n",
    "    subject = item[\"Subdomain\"]\n",
    "    id = f\"{subject}_{i}\"\n",
    "    return ask_judge(\n",
    "        client=client,\n",
    "        model_name=model_name,\n",
    "        question=question,\n",
    "        choices=choices,\n",
    "        target=target,\n",
    "        subject=subject,    \n",
    "        logging_dict=answer_dict,\n",
    "        id=id,\n",
    "    )\n",
    "\n",
    "# gpqa main\n",
    "output_dir = f\"dataset_filtering/gpqa_main/{date_now}/{time_now}\"\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "\n",
    "process_dataset(\n",
    "    dataset=ds_main,\n",
    "    output_dir=output_dir,\n",
    "    process_item_fn=process_item,\n",
    ")\n",
    "\n",
    "# gpqa diamond\n",
    "output_dir = f\"dataset_filtering/gpqa_diamond/{date_now}/{time_now}\"\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "\n",
    "process_dataset(\n",
    "    dataset=ds_diamond,\n",
    "    output_dir=output_dir,\n",
    "    process_item_fn=process_item,\n",
    ")\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**Prompt v2**\n",
    "\n",
    "**gpqa_main** \\\n",
    "Valid samples: 298 \\\n",
    "Invalid samples: 150\n",
    "\n",
    "**gpqa_diamond** \\\n",
    "Valid samples: 137 \\\n",
    "Invalid samples: 61"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "mm",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
