{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "NW5GH0rvtIDd"
      },
      "source": [
        "# Model codes for 2nd and 3rd steps in LLM-augumented Statistic Causal Discovery(for Google Colaboratory)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "5NdB8DG3Z4bi"
      },
      "source": [
        "###Preparation of Libraries"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "oWw1GzgjQAFm"
      },
      "outputs": [],
      "source": [
        "!pip install numpy==1.25.0 #For maintaining the consistency of the versions among the packages tentatively."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "i59CJeENtDV8"
      },
      "outputs": [],
      "source": [
        "!pip install openai\n",
        "!pip install lingam\n",
        "!pip install factor_analyzer\n",
        "!pip install igraph\n",
        "!pip install pygam\n",
        "!pip install causal-learn"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "QUi9yQgmKt_T"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "os.environ[\"OPENAI_API_KEY\"]=\"\"#API key for OpenAI is inserted here.\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import graphviz\n",
        "import lingam\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "from lingam.utils import print_causal_directions, print_dagc, make_dot, make_prior_knowledge\n",
        "import hashlib\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "from causallearn.utils.GraphUtils import GraphUtils\n",
        "import matplotlib.image as mpimg\n",
        "import io\n",
        "from scipy.stats import norm\n",
        "from copy import deepcopy\n",
        "from itertools import combinations\n",
        "\n",
        "from causallearn.search.ConstraintBased.PC import pc\n",
        "from causallearn.search.ScoreBased.GES import ges\n",
        "\n",
        "\n",
        "print(\"NumPy\",  \"ver:\", np.__version__)\n",
        "print(\"Pandas\", \"ver:\", pd.__version__)\n",
        "print(\"Graphviz\",   \"ver:\", graphviz.__version__)\n",
        "print(\"LiNGAM\", \"ver:\", lingam.__version__)\n",
        "\n",
        "np.set_printoptions(precision=3, suppress=True)\n",
        "\n",
        "# fixing the random seed of np for the repoductivity\n",
        "np.random.seed(203)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "T1dtUs8GbDVn"
      },
      "source": [
        "### importing basic information for LLM-KBCI and the results of SCD"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "fbcKm86xEG4g"
      },
      "source": [
        "terminological setting for LLM"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "HdulVoDfCbEB"
      },
      "outputs": [],
      "source": [
        "#example for health screening data\n",
        "#blank 1\n",
        "context_X = \"on health screening results\"\n",
        "\n",
        "#blank 2\n",
        "labels_X = [\"body mass index\",\"waist circumference\", \"systolic blood pressure\", \"diastolic blood pressure\", \"hemoglobin A1c\", \"low density lipoprotein cholesterol\", \"age\"]\n",
        "\n",
        "#blank 4\n",
        "dataset_explanation_X = \"health screening results among working-age(from 40 to 64 years old) population\""
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Importing the results of SCD"
      ],
      "metadata": {
        "id": "kxPSoYtPV_EQ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# for PC\n",
        "dag_est_pc = np.loadtxt('', delimiter=',')#loading the csv file of adjacency matrix calculated with PC\n",
        "prob0_pc_directed = np.loadtxt('', delimiter=',')#loading the csv file of bootstrap probability matrix for directed edges calculated with PC\n",
        "prob0_pc_undirected = np.loadtxt('', delimiter=',')#loading the csv file of bootstrap probability matrix for undirected edges calculated with PC"
      ],
      "metadata": {
        "id": "xv5mDwwyXGHD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# for Exact Search\n",
        "dag_est_es = np.loadtxt('', delimiter=',')#loading the csv file of adjacency matrix calculated with Exact Search\n",
        "prob0_es = np.loadtxt('', delimiter=',') #loading the csv file of bootstrap probability matrix calculated with ExactSearch"
      ],
      "metadata": {
        "id": "okaO-lalYAJ3"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "dhwno5y_U0H3"
      },
      "outputs": [],
      "source": [
        "#for DirectLiNGAM\n",
        "lingam0_adjacency_matrix_ = np.loadtxt('', delimiter=',')#loading the csv file of adjacency matrix calculated with DirectLiNGAM with causal coefficients\n",
        "prob0_lingam = np.loadtxt('', delimiter=',')#loading the csv file of bootstrap probability matrix calculated with DirectLiNGAM"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "epbsKYbiKERq"
      },
      "source": [
        "## Preparing the 1st prompting(for 2nd step)"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "CK0G2Tg3UnVD"
      },
      "source": [
        "##For LiNGAM"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "k5vrvr4BUQrl"
      },
      "source": [
        "Preparation of functions for Pattern 1"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "pMotTu5dV5S1"
      },
      "outputs": [],
      "source": [
        "def all_edges_pattern1(adjacency_matrix, labels):\n",
        "  num_nodes = adjacency_matrix.shape[0]\n",
        "  text = \"\"\"All of the edges suggested by the statistical causal discovery are below:\n",
        "-----\n",
        "\"\"\"\n",
        "\n",
        "  for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if adjacency_matrix[i, j] == 0:\n",
        "                continue\n",
        "            else:\n",
        "              text = text + f\"\"\"{labels[j]} → {labels[i]}\n",
        "\"\"\"\n",
        "  text = text +\"\"\"-----\n",
        "\"\"\"\n",
        "  return text"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "7QRBmfnz0sh4"
      },
      "outputs": [],
      "source": [
        "def create_causal_text_matrix1_pattern1(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    causal_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if adjacency_matrix[i, j] == 0:\n",
        "                causal_text_matrix[i, j] = f\"there may be no direct impact of a change in {labels[j]} on {labels[i]}.\"\n",
        "            else:\n",
        "                causal_text_matrix[i, j] = f\"there may be a direct impact of a change in {labels[j]} on {labels[i]}.\"\n",
        "\n",
        "    return causal_text_matrix"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Preparation of functions for Pattern 2"
      ],
      "metadata": {
        "id": "nAxMQO8JZJ0v"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def all_edges_pattern2(boot_prob, labels):\n",
        "  num_nodes = boot_prob.shape[0]\n",
        "  text = \"\"\"All of the edges with non-zero bootstrap probabilities suggested by the statistical causal discovery are below:\n",
        "-----\n",
        "\"\"\"\n",
        "\n",
        "  for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if boot_prob[i, j] == 0:\n",
        "                continue\n",
        "            else:\n",
        "              text = text + f\"\"\"{labels[j]} → {labels[i]} (bootstrap probability = {boot_prob[i,j]})\n",
        "\"\"\"\n",
        "  text = text +\"\"\"-----\n",
        "\"\"\"\n",
        "  return text"
      ],
      "metadata": {
        "id": "DbjUa3ltZOn6"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "def create_causal_text_matrix1_pattern2(boot_prob, labels):\n",
        "    num_nodes = boot_prob.shape[0]\n",
        "    causal_text_matrix = np.empty(boot_prob.shape, dtype=object)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if boot_prob[i, j] == 0:\n",
        "                causal_text_matrix[i, j] = f\"there may be no direct impact of a change in {labels[j]} on {labels[i]}.\"\n",
        "            else:\n",
        "                causal_text_matrix[i, j] = f\"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob[i, j]}.\"\n",
        "\n",
        "    return causal_text_matrix"
      ],
      "metadata": {
        "id": "q9ozLC3QZPCe"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VqyZfNvXwEk1"
      },
      "source": [
        "Preparation of functions for Pattern 3"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "57zKbxgUwFJb"
      },
      "outputs": [],
      "source": [
        "def all_edges_pattern3(adjacency_matrix, labels):\n",
        "  num_nodes = adjacency_matrix.shape[0]\n",
        "  text = \"\"\"All of the edges and their coefficients of the structural causal model suggested by the statistical causal discovery are below:\n",
        "-----\n",
        "\"\"\"\n",
        "\n",
        "  for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if adjacency_matrix[i, j] == 0:\n",
        "                continue\n",
        "            else:\n",
        "              text = text + f\"\"\"{labels[j]} → {labels[i]} (coefficient = {adjacency_matrix[i,j]})\n",
        "\"\"\"\n",
        "  text = text +\"\"\"-----\n",
        "\"\"\"\n",
        "  return text"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "-4oqdC3b2AWh"
      },
      "outputs": [],
      "source": [
        "def create_causal_text_matrix1_pattern3(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    causal_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if adjacency_matrix[i, j] == 0:\n",
        "                causal_text_matrix[i, j] = f\"there may be no direct impact of a change in {labels[j]} on {labels[i]}.\"\n",
        "            else:\n",
        "                causal_text_matrix[i, j] = f\"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a causal coefficient of {adjacency_matrix[i, j]}.\"\n",
        "\n",
        "    return causal_text_matrix"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "FnQAmh7KypTN"
      },
      "source": [
        "Preparation of functions for Pattern 4"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "KgDuf0qnyqMR"
      },
      "outputs": [],
      "source": [
        "def all_edges_pattern4(adjacency_matrix, boot_prob, labels):\n",
        "  num_nodes = boot_prob.shape[0]\n",
        "  text = \"\"\"All of the edges with non-zero bootstrap probabilities and their coefficients of the structural causal model suggested by the statistical causal discovery are below:\n",
        "-----\n",
        "\"\"\"\n",
        "\n",
        "  for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if boot_prob[i, j] == 0:\n",
        "                continue\n",
        "            else:\n",
        "              text = text + f\"\"\"{labels[j]} → {labels[i]} (coefficient = {adjacency_matrix[i, j]}, bootstrap probability = {boot_prob[i,j]})\n",
        "\"\"\"\n",
        "  text = text +\"\"\"-----\n",
        "\"\"\"\n",
        "  return text"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "peXIfk8S32xG"
      },
      "outputs": [],
      "source": [
        "def create_causal_text_matrix1_pattern4(adjacency_matrix, boot_prob, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    causal_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if boot_prob[i, j] == 0:\n",
        "                causal_text_matrix[i, j] = f\"there may be no direct impact of a change in {labels[j]} on {labels[i]}.\"\n",
        "            else:\n",
        "              if adjacency_matrix[i, j] == 0:\n",
        "                  causal_text_matrix[i, j] = f\"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob[i, j]}, but the coefficient is likely to be {adjacency_matrix[i, j]}.\"\n",
        "\n",
        "              else:\n",
        "                  causal_text_matrix[i, j] = f\"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob[i, j]}, and the coefficient is likely to be {adjacency_matrix[i, j]}.\"\n",
        "\n",
        "\n",
        "    return causal_text_matrix"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "8yRKzBm_9Z3F"
      },
      "outputs": [],
      "source": [
        "template_Q1_1 = \"We want to carry out causal inference {}, considering {} as variables.\"\n",
        "template_Q1_2 = \"First, we have conducted the statistical causal discovery with LiNGAM(Linear Non-Gaussian Acyclic Model) algorithm, using a fully standardized dataset on {}.\"\n",
        "\n",
        "variables_X = ', '.join(labels_X[:-1]) + ', and ' + labels_X[-1]\n",
        "\n",
        "\n",
        "Q1_1 = template_Q1_1.format(context_X, variables_X)\n",
        "Q1_2 = template_Q1_2.format(dataset_explanation_X)\n",
        "\n",
        "Q1_3 = f\"According to the results shown above, it has been determined that\"\n",
        "\n",
        "def create_1st_template_text_matrix(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    causal_1st_template_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            causal_1st_template_text_matrix[i, j] = f\"\"\"Then, your task is to interpret this result from a domain knowledge perspective and determine whether this statistically suggested hypothesis is plausible in the context of the domain.\n",
        "Please provide an explanation that leverages your expert knowledge on the causal relationship between {labels[j]} and {labels[i]}, and assess the naturalness of this causal discovery result.\n",
        "Your response should consider the relevant factors and provide a reasoned explanation based on your understanding of the domain.\"\"\"\n",
        "\n",
        "    return causal_1st_template_text_matrix\n",
        "\n",
        "#Pattern 0 is prepared from here.\n",
        "def create_1st_prompt_matrix_pattern0(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            first_prompt_matrix[i, j] = Q1_1 + \"\\n\" + f\"\"\"If {labels[j]} is modified, will it have a direct impact on {labels[i]}?\n",
        "Please provide an explanation that leverages your expert knowledge on the causal relationship between {labels[j]} and {labels[i]}.\n",
        "Your response should consider the relevant factors and provide a reasoned explanation based on your understanding of the domain.\"\"\"\n",
        "\n",
        "    return first_prompt_matrix\n",
        "\n",
        "#Pattern 1\n",
        "def create_1st_prompt_matrix_pattern1(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    all_edges = all_edges_pattern1(adjacency_matrix, labels)\n",
        "    causal_texts = create_causal_text_matrix1_pattern1(adjacency_matrix, labels)\n",
        "    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            first_prompt_matrix[i, j] = Q1_1 +\"\\n\"+ Q1_2 +\"\\n\"+ all_edges + \"\\n\"+ Q1_3 + causal_texts[i, j] +\"\\n\"+ causal_1st_template_texts[i, j]\n",
        "\n",
        "    return first_prompt_matrix\n",
        "\n",
        "#Pattern 2\n",
        "def create_1st_prompt_matrix_pattern2(boot_prob, labels):\n",
        "    num_nodes = boot_prob.shape[0]\n",
        "    first_prompt_matrix = np.empty(boot_prob.shape, dtype=object)\n",
        "\n",
        "    all_edges = all_edges_pattern3(boot_prob, labels)\n",
        "    causal_texts = create_causal_text_matrix1_pattern3(boot_prob, labels)\n",
        "    causal_1st_template_texts = create_1st_template_text_matrix(boot_prob, labels)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            first_prompt_matrix[i, j] = Q1_1 +\"\\n\"+ Q1_2 +\"\\n\"+ all_edges + \"\\n\"+ Q1_3 + causal_texts[i, j] +\"\\n\"+ causal_1st_template_texts[i, j]\n",
        "\n",
        "    return first_prompt_matrix\n",
        "\n",
        "\n",
        "#Pattern 3\n",
        "def create_1st_prompt_matrix_pattern3(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    all_edges = all_edges_pattern2(adjacency_matrix, labels)\n",
        "    causal_texts = create_causal_text_matrix1_pattern2(adjacency_matrix, labels)\n",
        "    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            first_prompt_matrix[i, j] = Q1_1 +\"\\n\"+ Q1_2 +\"\\n\"+ all_edges + \"\\n\"+ Q1_3 + causal_texts[i, j] +\"\\n\"+ causal_1st_template_texts[i, j]\n",
        "\n",
        "    return first_prompt_matrix\n",
        "\n",
        "#Pattern 4\n",
        "def create_1st_prompt_matrix_pattern4(adjacency_matrix, boot_prob, labels):\n",
        "    num_nodes = boot_prob.shape[0]\n",
        "    first_prompt_matrix = np.empty(boot_prob.shape, dtype=object)\n",
        "\n",
        "    all_edges = all_edges_pattern4(adjacency_matrix, boot_prob, labels)\n",
        "    causal_texts = create_causal_text_matrix1_pattern4(adjacency_matrix, boot_prob, labels)\n",
        "    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            first_prompt_matrix[i, j] = Q1_1 +\"\\n\"+ Q1_2 +\"\\n\"+ all_edges + \"\\n\"+ Q1_3 + causal_texts[i, j] +\"\\n\"+ causal_1st_template_texts[i, j]\n",
        "\n",
        "    return first_prompt_matrix\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "hzNb-XAg_RlF"
      },
      "outputs": [],
      "source": [
        "# comletion of 1st prompting matrices\n",
        "first_prompt_matrix_LiNGAM1_X_pattern0 = create_1st_prompt_matrix_pattern0(lingam0_adjacency_matrix_, labels_X)\n",
        "first_prompt_matrix_LiNGAM1_X_pattern1 = create_1st_prompt_matrix_pattern1(lingam0_adjacency_matrix_, labels_X)\n",
        "first_prompt_matrix_LiNGAM1_X_pattern2 = create_1st_prompt_matrix_pattern2(prob0_lingam, labels_X)\n",
        "first_prompt_matrix_LiNGAM1_X_pattern3 = create_1st_prompt_matrix_pattern3(lingam0_adjacency_matrix_, labels_X)\n",
        "first_prompt_matrix_LiNGAM1_X_pattern4 = create_1st_prompt_matrix_pattern4(lingam0_adjacency_matrix_, prob0_lingam, labels_X)"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "##For Exact Search(Slight modification of patterns for DirectLiNGAM)"
      ],
      "metadata": {
        "id": "wquVq8reb4U1"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "template_Q1_1 = \"We want to carry out causal inference {}, considering {} as variables.\"\n",
        "template_Q1_2 = \"First, we have conducted the statistical causal discovery with Exact Search algorithm, using a fully standardized dataset on {}.\"\n",
        "\n",
        "variables_X = ', '.join(labels_X[:-1]) + ', and ' + labels_X[-1]\n",
        "\n",
        "Q1_1 = template_Q1_1.format(context_X, variables_X)\n",
        "Q1_2 = template_Q1_2.format(dataset_explanation_X)\n",
        "\n",
        "Q1_3 = f\"According to the results shown above, it has been determined that\"\n",
        "\n",
        "#Pattern1\n",
        "def create_1st_prompt_matrix_pattern1(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    all_edges = all_edges_pattern1(adjacency_matrix, labels)\n",
        "    causal_texts = create_causal_text_matrix1_pattern1(adjacency_matrix, labels)\n",
        "    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            first_prompt_matrix[i, j] = Q1_1 +\"\\n\"+ Q1_2 +\"\\n\"+ all_edges + \"\\n\"+ Q1_3 + causal_texts[i, j] +\"\\n\"+ causal_1st_template_texts[i, j]\n",
        "\n",
        "    return first_prompt_matrix\n",
        "\n",
        "#Pattern2\n",
        "def create_1st_prompt_matrix_pattern2(boot_prob, labels):\n",
        "    num_nodes = boot_prob.shape[0]\n",
        "    first_prompt_matrix = np.empty(boot_prob.shape, dtype=object)\n",
        "\n",
        "    all_edges = all_edges_pattern3(boot_prob, labels)\n",
        "    causal_texts = create_causal_text_matrix1_pattern2(boot_prob, labels)\n",
        "    causal_1st_template_texts = create_1st_template_text_matrix(boot_prob, labels)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            first_prompt_matrix[i, j] = Q1_1 +\"\\n\"+ Q1_2 +\"\\n\"+ all_edges + \"\\n\"+ Q1_3 + causal_texts[i, j] +\"\\n\"+ causal_1st_template_texts[i, j]\n",
        "\n",
        "    return first_prompt_matrix"
      ],
      "metadata": {
        "id": "vFpikCajcwbt"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "first_prompt_matrix_ES_X_pattern1 = create_1st_prompt_matrix_pattern1(dag_est_es, labels_X)\n",
        "first_prompt_matrix_ES_X_pattern2 = create_1st_prompt_matrix_pattern2(prob0_es, labels_X)"
      ],
      "metadata": {
        "id": "XkHiDkr8dNUD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## For PC"
      ],
      "metadata": {
        "id": "ySAaUGzwdeCT"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Preparation for Pattern 1"
      ],
      "metadata": {
        "id": "aQ_DBuyIdkou"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def all_edges_pattern1_PC(adjacency_matrix, labels):\n",
        "  num_nodes = adjacency_matrix.shape[0]\n",
        "  text = \"\"\"All of the directed edges suggested by the statistic causal discovery are below:\n",
        "-----\n",
        "\"\"\"\n",
        "\n",
        "  for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if adjacency_matrix[i, j] == 1:\n",
        "              text = text + f\"\"\"{labels[j]} → {labels[i]}\n",
        "\"\"\"\n",
        "  text = text +\"\"\"-----\n",
        "  In additon to the directed edges above, all of the undirected edges suggested by the statistic causal discovery are below:\n",
        "-----\n",
        "\"\"\"\n",
        "\n",
        "  for i in range(num_nodes):\n",
        "        for j in range(i+1, num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if adjacency_matrix[i, j] == -1:\n",
        "              text = text + f\"\"\"{labels[j]} － {labels[i]}\n",
        "\"\"\"\n",
        "  text = text +\"\"\"-----\n",
        "\"\"\"\n",
        "  return text"
      ],
      "metadata": {
        "id": "qSZzxjandgOk"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def create_causal_text_matrix1_pattern1_PC(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    causal_text_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if adjacency_matrix[i, j] == 0:\n",
        "                causal_text_matrix[i, j] = f\"there may be no direct impact of a change in {labels[j]} on {labels[i]}.\"\n",
        "            if adjacency_matrix[i, j] == 1:\n",
        "                causal_text_matrix[i, j] = f\"there may be a direct impact of a change in {labels[j]} on {labels[i]}.\"\n",
        "            else:\n",
        "                causal_text_matrix[i, j] = f\"there may be a direct causal relationship between {labels[j]} and {labels[i]}, although the direction has not been determined.\"\n",
        "    return causal_text_matrix"
      ],
      "metadata": {
        "id": "FOetAYBGdrHF"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Preparation for Pattern 2"
      ],
      "metadata": {
        "id": "cUX9k-Fld7a2"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def all_edges_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels):\n",
        "  num_nodes = boot_prob0_directed.shape[0]\n",
        "  text = \"\"\"All of the directed edges with non-zero bootstrap probabilities suggested by the statistic causal discovery are below:\n",
        "-----\n",
        "\"\"\"\n",
        "\n",
        "  for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if boot_prob0_directed[i, j] == 0:\n",
        "                continue\n",
        "            else:\n",
        "              text = text + f\"\"\"{labels[j]} → {labels[i]} (bootstrap probability = {boot_prob0_directed[i,j]})\n",
        "\"\"\"\n",
        "  text = text +\"\"\"-----\n",
        "  In additon to the directed edges above, all of the undirected edges suggested by the statistic causal discovery are below:\n",
        "-----\n",
        "\"\"\"\n",
        "\n",
        "  for i in range(num_nodes):\n",
        "        for j in range(i+1, num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if boot_prob0_undirected[i, j] == 0:\n",
        "                continue\n",
        "            else:\n",
        "              text = text + f\"\"\"{labels[j]} ― {labels[i]} (bootstrap probability = {boot_prob0_undirected[i,j]})\n",
        "\"\"\"\n",
        "  text = text +\"\"\"-----\n",
        "\"\"\"\n",
        "  return text"
      ],
      "metadata": {
        "id": "qrucbomRd-LD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def create_causal_text_matrix1_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels):\n",
        "    num_nodes = boot_prob0_directed.shape[0]\n",
        "    causal_text_matrix = np.empty(boot_prob0_directed.shape, dtype=object)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            if boot_prob0_directed[i, j] == 0 and boot_prob0_undirected[i, j] == 0:\n",
        "                causal_text_matrix[i, j] = f\"there may be no direct impact of a change in {labels[j]} on {labels[i]}.\"\n",
        "\n",
        "            if boot_prob0_directed[i, j] != 0 and boot_prob0_undirected[i, j] == 0:\n",
        "                causal_text_matrix[i, j] = f\"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob0_directed[i, j]}.\"\n",
        "\n",
        "            if boot_prob0_directed[i, j] == 0 and boot_prob0_undirected[i, j] != 0:\n",
        "                causal_text_matrix[i, j] = f\"there may be a direct causal relationship between {labels[j]} and {labels[i]} with a bootstrap probability of {boot_prob0_undirected[i, j]}, although the direction has not been determined.\"\n",
        "\n",
        "            else:\n",
        "                causal_text_matrix[i, j] = f\"there may be a direct impact of a change in {labels[j]} on {labels[i]} with a bootstrap probability of {boot_prob0_directed[i, j]}. In addition, it has also been shown above that there may be a direct causal relationship between {labels[j]} and {labels[i]} with a bootstrap probability of {boot_prob0_undirected[i, j]},although the direction has not completely been determined.\"\n",
        "\n",
        "    return causal_text_matrix"
      ],
      "metadata": {
        "id": "4UPkQACieLHe"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "template_Q1_1 = \"We want to carry out causal inference {}, considering {} as variables.\"\n",
        "template_Q1_2 = \"First, we have conducted the statistical causal discovery with PC(Peter-Clerk) algorithm, using a fully standardized dataset on {}.\"\n",
        "\n",
        "variables_X = ', '.join(labels_X[:-1]) + ', and ' + labels_X[-1]\n",
        "\n",
        "Q1_1 = template_Q1_1.format(context_X, variables_X)\n",
        "Q1_2 = template_Q1_2.format(dataset_explanation_X)\n",
        "\n",
        "Q1_3 = f\"According to the results shown above, it has been determined that \"#LiNGAMの出力結果のテキストの直前部分。\n",
        "\n",
        "\n",
        "#Pattern 1\n",
        "def create_1st_prompt_matrix_pattern1_PC(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    first_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    all_edges = all_edges_pattern1_PC(adjacency_matrix, labels)\n",
        "    causal_texts = create_causal_text_matrix1_pattern1_PC(adjacency_matrix, labels)\n",
        "    causal_1st_template_texts = create_1st_template_text_matrix(adjacency_matrix, labels)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            first_prompt_matrix[i, j] = Q1_1 +\"\\n\"+ Q1_2 +\"\\n\"+ all_edges + \"\\n\"+ Q1_3 + causal_texts[i, j] +\"\\n\"+ causal_1st_template_texts[i, j]\n",
        "\n",
        "    return first_prompt_matrix\n",
        "\n",
        "#パターン2\n",
        "def create_1st_prompt_matrix_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels):\n",
        "    num_nodes = boot_prob0_directed.shape[0]\n",
        "    first_prompt_matrix = np.empty(boot_prob0_directed.shape, dtype=object)\n",
        "\n",
        "    all_edges = all_edges_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels)\n",
        "    causal_texts = create_causal_text_matrix1_pattern2_PC(boot_prob0_directed, boot_prob0_undirected, labels)\n",
        "    causal_1st_template_texts = create_1st_template_text_matrix(boot_prob0_directed, labels)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue # 対角成分にも、テキストが入ってしまう場合の例外処理\n",
        "            first_prompt_matrix[i, j] = Q1_1 +\"\\n\"+ Q1_2 +\"\\n\"+ all_edges + \"\\n\"+ Q1_3 + causal_texts[i, j] +\"\\n\"+ causal_1st_template_texts[i, j]\n",
        "\n",
        "    return first_prompt_matrix"
      ],
      "metadata": {
        "id": "NDeeYnkoecpk"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "first_prompt_matrix_PC_X_pattern1 = create_1st_prompt_matrix_pattern1_PC(dag_est_pc, labels_X)\n",
        "first_prompt_matrix_PC_X_pattern2 = create_1st_prompt_matrix_pattern2_PC(prob0_pc_directed, prob0_pc_undirected, labels_X)"
      ],
      "metadata": {
        "id": "_M9Ug1ANe0W7"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "2Si3EUTFCvh6"
      },
      "source": [
        "# Knowledge generation with 1st prompting"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "RzCoaqMmFFFK"
      },
      "outputs": [],
      "source": [
        "system_role = \"You are a helpful assistant for causal inference.\"\n",
        "from openai import OpenAI\n",
        "client = OpenAI()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "QgnO3zreeFJW"
      },
      "outputs": [],
      "source": [
        "generated_knowledge_matrix_0_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)\n",
        "generated_knowledge_matrix_1_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)\n",
        "generated_knowledge_matrix_2_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)\n",
        "generated_knowledge_matrix_3_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)\n",
        "generated_knowledge_matrix_4_L = np.empty(lingam0_adjacency_matrix_.shape, dtype=object)\n",
        "generated_knowledge_matrix_1_E = np.empty(dag_est_es.shape, dtype=object)\n",
        "generated_knowledge_matrix_2_E = np.empty(dag_est_es.shape, dtype=object)\n",
        "generated_knowledge_matrix_1_P = np.empty(dag_est_pc.shape, dtype=object)\n",
        "generated_knowledge_matrix_2_P = np.empty(dag_est_pc.shape, dtype=object)"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "For DirectLiNGAM"
      ],
      "metadata": {
        "id": "4trU1ww2rMYY"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#pattern 0\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "\n",
        "      response1 = client.chat.completions.create(\n",
        "        model=\"gpt-4-1106-preview\",\n",
        "        messages=[\n",
        "          {\"role\": \"system\",\n",
        "           \"content\": system_role},\n",
        "          {\n",
        "           \"role\": \"user\",\n",
        "           \"content\": first_prompt_matrix_LiNGAM1_X_pattern0[i,j]\n",
        "          }\n",
        "          ],\n",
        "        temperature=0.7,\n",
        "        max_tokens=3000\n",
        "        )\n",
        "      generated_knowledge_matrix_0_L[i, j]= response1.choices[0].message.content\n",
        "\n",
        "      print(str(i)+\",\"+str(j))\n",
        "\n",
        "generated_knowledge_matrix_0_L_df = pd.DataFrame(generated_knowledge_matrix_0_L)\n",
        "generated_knowledge_matrix_0_L_df.to_csv(\"generated_knowledge_matrix_0_L.csv\", encoding='utf-8')\n",
        "generated_knowledge_matrix_0_L_df.to_csv(\"generated_knowledge_matrix_0_L_for_excel.csv\", encoding='utf-8-sig')"
      ],
      "metadata": {
        "id": "HfuZWkdCrWyx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "WpHOMJlNLUSi"
      },
      "outputs": [],
      "source": [
        "#pattern 1\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "\n",
        "      response1 = client.chat.completions.create(\n",
        "        model=\"gpt-4-1106-preview\",\n",
        "        messages=[\n",
        "          {\"role\": \"system\",\n",
        "           \"content\": system_role},\n",
        "          {\n",
        "           \"role\": \"user\",\n",
        "           \"content\": first_prompt_matrix_LiNGAM1_X_pattern1[i,j]\n",
        "          }\n",
        "          ],\n",
        "        temperature=0.7,\n",
        "        max_tokens=3000\n",
        "        )\n",
        "      generated_knowledge_matrix_1_L[i, j]= response1.choices[0].message.content\n",
        "\n",
        "      print(str(i)+\",\"+str(j))\n",
        "\n",
        "generated_knowledge_matrix_1_L_df = pd.DataFrame(generated_knowledge_matrix_1_L)\n",
        "generated_knowledge_matrix_1_L_df.to_csv(\"generated_knowledge_matrix_1_L.csv\", encoding='utf-8')\n",
        "generated_knowledge_matrix_1_L_df.to_csv(\"generated_knowledge_matrix_1_L_for_excel.csv\", encoding='utf-8-sig')"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#pattern 2\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "\n",
        "      response1 = client.chat.completions.create(\n",
        "        model=\"gpt-4-1106-preview\",\n",
        "        messages=[\n",
        "          {\"role\": \"system\",\n",
        "           \"content\": system_role},\n",
        "          {\n",
        "           \"role\": \"user\",\n",
        "           \"content\": first_prompt_matrix_LiNGAM1_X_pattern2[i,j]\n",
        "          }\n",
        "          ],\n",
        "        temperature=0.7,\n",
        "        max_tokens=3000\n",
        "        )\n",
        "      generated_knowledge_matrix_2_L[i, j]= response1.choices[0].message.content\n",
        "\n",
        "      print(str(i)+\",\"+str(j))\n",
        "\n",
        "generated_knowledge_matrix_2_L_df = pd.DataFrame(generated_knowledge_matrix_2_L)\n",
        "generated_knowledge_matrix_2_L_df.to_csv(\"generated_knowledge_matrix_2_L.csv\", encoding='utf-8')\n",
        "generated_knowledge_matrix_2_L_df.to_csv(\"generated_knowledge_matrix_2_L_for_excel.csv\", encoding='utf-8-sig')"
      ],
      "metadata": {
        "id": "DiqrbfmtjNpD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#pattern 3\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "\n",
        "      response1 = client.chat.completions.create(\n",
        "        model=\"gpt-4-1106-preview\",\n",
        "        messages=[\n",
        "          {\"role\": \"system\",\n",
        "           \"content\": system_role},\n",
        "          {\n",
        "           \"role\": \"user\",\n",
        "           \"content\": first_prompt_matrix_LiNGAM1_X_pattern3[i,j]\n",
        "          }\n",
        "          ],\n",
        "        temperature=0.7,\n",
        "        max_tokens=3000\n",
        "        )\n",
        "      generated_knowledge_matrix_3_L[i, j]= response1.choices[0].message.content\n",
        "\n",
        "      print(str(i)+\",\"+str(j))\n",
        "\n",
        "generated_knowledge_matrix_3_L_df = pd.DataFrame(generated_knowledge_matrix_3_L)\n",
        "generated_knowledge_matrix_3_L_df.to_csv(\"generated_knowledge_matrix_3_L.csv\", encoding='utf-8')\n",
        "generated_knowledge_matrix_3_L_df.to_csv(\"generated_knowledge_matrix_3_L_for_excel.csv\", encoding='utf-8-sig')"
      ],
      "metadata": {
        "id": "-XDqofeqjhvT"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#pattern 4\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "\n",
        "      response1 = client.chat.completions.create(\n",
        "        model=\"gpt-4-1106-preview\",\n",
        "        messages=[\n",
        "          {\"role\": \"system\",\n",
        "           \"content\": system_role},\n",
        "          {\n",
        "           \"role\": \"user\",\n",
        "           \"content\": first_prompt_matrix_LiNGAM1_X_pattern4[i,j]\n",
        "          }\n",
        "          ],\n",
        "        temperature=0.7,\n",
        "        max_tokens=3000\n",
        "        )\n",
        "      generated_knowledge_matrix_4_L[i, j]= response1.choices[0].message.content\n",
        "\n",
        "      print(str(i)+\",\"+str(j))\n",
        "\n",
        "generated_knowledge_matrix_4_L_df = pd.DataFrame(generated_knowledge_matrix_4_L)\n",
        "generated_knowledge_matrix_4_L_df.to_csv(\"generated_knowledge_matrix_4_L.csv\", encoding='utf-8')\n",
        "generated_knowledge_matrix_4_L_df.to_csv(\"generated_knowledge_matrix_4_L_for_excel.csv\", encoding='utf-8-sig')"
      ],
      "metadata": {
        "id": "90vK0gzIqfcb"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "For Exact Search"
      ],
      "metadata": {
        "id": "lP918LyosMdI"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#pattern 1\n",
        "for i in range(dag_est_es.shape[0]):\n",
        "    for j in range(dag_est_es.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "\n",
        "      response1 = client.chat.completions.create(\n",
        "        model=\"gpt-4-1106-preview\",\n",
        "        messages=[\n",
        "          {\"role\": \"system\",\n",
        "           \"content\": system_role},\n",
        "          {\n",
        "           \"role\": \"user\",\n",
        "           \"content\": first_prompt_matrix_ES_X_pattern1[i,j]\n",
        "          }\n",
        "          ],\n",
        "        temperature=0.7,\n",
        "        max_tokens=3000\n",
        "        )\n",
        "      generated_knowledge_matrix_1_E[i, j]= response1.choices[0].message.content\n",
        "\n",
        "      print(str(i)+\",\"+str(j))\n",
        "\n",
        "generated_knowledge_matrix_1_E_df = pd.DataFrame(generated_knowledge_matrix_1_E)\n",
        "generated_knowledge_matrix_1_E_df.to_csv(\"generated_knowledge_matrix_1_E.csv\", encoding='utf-8')\n",
        "generated_knowledge_matrix_1_E_df.to_csv(\"generated_knowledge_matrix_1_E_for_excel.csv\", encoding='utf-8-sig')"
      ],
      "metadata": {
        "id": "lqLrp8bWsLwM"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#pattern 2\n",
        "for i in range(dag_est_es.shape[0]):\n",
        "    for j in range(dag_est_es.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "\n",
        "      response1 = client.chat.completions.create(\n",
        "        model=\"gpt-4-1106-preview\",\n",
        "        messages=[\n",
        "          {\"role\": \"system\",\n",
        "           \"content\": system_role},\n",
        "          {\n",
        "           \"role\": \"user\",\n",
        "           \"content\": first_prompt_matrix_ES_X_pattern2[i,j]\n",
        "          }\n",
        "          ],\n",
        "        temperature=0.7,\n",
        "        max_tokens=3000\n",
        "        )\n",
        "      generated_knowledge_matrix_2_E[i, j]= response1.choices[0].message.content\n",
        "\n",
        "      print(str(i)+\",\"+str(j))\n",
        "\n",
        "generated_knowledge_matrix_2_E_df = pd.DataFrame(generated_knowledge_matrix_2_E)\n",
        "generated_knowledge_matrix_2_E_df.to_csv(\"generated_knowledge_matrix_2_E.csv\", encoding='utf-8')\n",
        "generated_knowledge_matrix_2_E_df.to_csv(\"generated_knowledge_matrix_2_E_for_excel.csv\", encoding='utf-8-sig')"
      ],
      "metadata": {
        "id": "U090BNk5tPU9"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "For PC"
      ],
      "metadata": {
        "id": "ZgV933i5tasY"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#pattern 1\n",
        "for i in range(dag_est_pc.shape[0]):\n",
        "    for j in range(dag_est_pc.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "\n",
        "      response1 = client.chat.completions.create(\n",
        "        model=\"gpt-4-1106-preview\",\n",
        "        messages=[\n",
        "          {\"role\": \"system\",\n",
        "           \"content\": system_role},\n",
        "          {\n",
        "           \"role\": \"user\",\n",
        "           \"content\": first_prompt_matrix_PC_X_pattern1[i,j]\n",
        "          }\n",
        "          ],\n",
        "        temperature=0.7,\n",
        "        max_tokens=3000\n",
        "        )\n",
        "      generated_knowledge_matrix_1_P[i, j]= response1.choices[0].message.content\n",
        "\n",
        "      print(str(i)+\",\"+str(j))\n",
        "\n",
        "generated_knowledge_matrix_1_P_df = pd.DataFrame(generated_knowledge_matrix_1_P)\n",
        "generated_knowledge_matrix_1_P_df.to_csv(\"generated_knowledge_matrix_1_P.csv\", encoding='utf-8')\n",
        "generated_knowledge_matrix_1_P_df.to_csv(\"generated_knowledge_matrix_1_P_for_excel.csv\", encoding='utf-8-sig')"
      ],
      "metadata": {
        "id": "flFZJh1PtZPI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#pattern 2\n",
        "for i in range(dag_est_pc.shape[0]):\n",
        "    for j in range(dag_est_pc.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "\n",
        "      response1 = client.chat.completions.create(\n",
        "        model=\"gpt-4-1106-preview\",\n",
        "        messages=[\n",
        "          {\"role\": \"system\",\n",
        "           \"content\": system_role},\n",
        "          {\n",
        "           \"role\": \"user\",\n",
        "           \"content\": first_prompt_matrix_PC_X_pattern2[i,j]\n",
        "          }\n",
        "          ],\n",
        "        temperature=0.7,\n",
        "        max_tokens=3000\n",
        "        )\n",
        "      generated_knowledge_matrix_2_P[i, j]= response1.choices[0].message.content\n",
        "\n",
        "      print(str(i)+\",\"+str(j))\n",
        "\n",
        "generated_knowledge_matrix_2_P_df = pd.DataFrame(generated_knowledge_matrix_2_P)\n",
        "generated_knowledge_matrix_2_P_df.to_csv(\"generated_knowledge_matrix_2_P.csv\", encoding='utf-8')\n",
        "generated_knowledge_matrix_2_P_df.to_csv(\"generated_knowledge_matrix_2_P_for_excel.csv\", encoding='utf-8-sig')"
      ],
      "metadata": {
        "id": "oh2lr02Qtoa_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "an5JN4hSz7Yp"
      },
      "source": [
        "# Constructiong 2nd prompt(preparing for 3rd step)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "OG7o2uKSQMEt"
      },
      "outputs": [],
      "source": [
        "sen_eng_1 = f\"\"\"An expert was asked the question below:\n",
        "\"\"\"\n",
        "sen_eng_2 = f\"\"\"Then, the expert replied with its domain knowledge:\n",
        "\"\"\"\n",
        "sen_eng_3 = f\"\"\"\n",
        "Considering objectively this discussion above,\"\"\"\n",
        "\n",
        "sen_eng_4 = f\"\"\"\n",
        "Please answer this question with <yes> or <no>.\n",
        "No answers except these two responses are needed.\"\"\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "IqtetrhW2_bC"
      },
      "outputs": [],
      "source": [
        "def create_2nd_Question_matrix(adjacency_matrix, labels):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    second_Question_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            second_Question_matrix[i, j] = f\"if {labels[j]} is modified, will it have a direct or indirect impact on {labels[i]}?\"\n",
        "\n",
        "    return second_Question_matrix"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "UtUXoOnr5iJw"
      },
      "outputs": [],
      "source": [
        "def create_2nd_prompt_matrix(adjacency_matrix, labels, first_prompt_matrix, first_answer_matrix):\n",
        "    num_nodes = adjacency_matrix.shape[0]\n",
        "    second_prompt_matrix = np.empty(adjacency_matrix.shape, dtype=object)\n",
        "    second_quesiton_matrix =  create_2nd_Question_matrix(adjacency_matrix, labels)\n",
        "\n",
        "    for i in range(num_nodes):\n",
        "        for j in range(num_nodes):\n",
        "            if j == i:\n",
        "                continue\n",
        "            second_prompt_matrix[i, j] = sen_eng_1 + first_prompt_matrix[i, j] + sen_eng_2 + first_answer_matrix[i, j] + sen_eng_3 + second_quesiton_matrix[i, j] + sen_eng_4\n",
        "    return second_prompt_matrix"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "acS5_5eN8D9D"
      },
      "outputs": [],
      "source": [
        "#2nd prompt matrix generation\n",
        "#for LiNGAM\n",
        "causal_2nd_prompt_LiNGAM1_X_pattern0 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern0, generated_knowledge_matrix_0_L)\n",
        "causal_2nd_prompt_LiNGAM1_X_pattern1 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern1, generated_knowledge_matrix_1_L)\n",
        "causal_2nd_prompt_LiNGAM1_X_pattern2 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern2, generated_knowledge_matrix_2_L)\n",
        "causal_2nd_prompt_LiNGAM1_X_pattern3 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern3, generated_knowledge_matrix_3_L)\n",
        "causal_2nd_prompt_LiNGAM1_X_pattern4 = create_2nd_prompt_matrix(lingam0_adjacency_matrix_, labels_X, first_prompt_matrix_LiNGAM1_X_pattern4, generated_knowledge_matrix_4_L)\n",
        "\n",
        "#for Exact Search\n",
        "causal_2nd_prompt_ES_X_pattern1 = create_2nd_prompt_matrix(dag_est_es, labels_X, first_prompt_matrix_ES_X_pattern1, generated_knowledge_matrix_1_E)\n",
        "causal_2nd_prompt_ES_X_pattern2 = create_2nd_prompt_matrix(dag_est_es, labels_X, first_prompt_matrix_ES_X_pattern2, generated_knowledge_matrix_2_E)\n",
        "\n",
        "#for PC\n",
        "causal_2nd_prompt_PC_X_pattern1 = create_2nd_prompt_matrix(dag_est_pc, labels_X, first_prompt_matrix_PC_X_pattern1, generated_knowledge_matrix_1_P)\n",
        "causal_2nd_prompt_PC_X_pattern2 = create_2nd_prompt_matrix(dag_est_pc, labels_X, first_prompt_matrix_PC_X_pattern2, generated_knowledge_matrix_2_P)\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Pm5GiUpeDfWY"
      },
      "source": [
        "# Probability calculation from the responses to 2nd promptings"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# functions for means and standard deviations for probabilities\n",
        "def calculate_mean_std_matrices(measured_prob_table, adjacency_matrix):\n",
        "\n",
        "    mean_matrix = np.empty(adjacency_matrix.shape)\n",
        "    std_dev_matrix = np.empty(adjacency_matrix.shape)\n",
        "\n",
        "    for i in range(adjacency_matrix.shape[0]):\n",
        "        for j in range(adjacency_matrix.shape[0]):\n",
        "\n",
        "            values = measured_prob_table[(measured_prob_table['i'] == f\"{i}\") & (measured_prob_table['j'] == f\"{j}\")]['probability']\n",
        "\n",
        "            mean_matrix[i, j] = np.mean(values)\n",
        "            std_dev_matrix[i, j] = np.std(values)\n",
        "\n",
        "    return mean_matrix, std_dev_matrix"
      ],
      "metadata": {
        "id": "iQ-VAYiBfg3v"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "8lMnIPk7ZGMZ"
      },
      "outputs": [],
      "source": [
        "system_role = \"You are a helpful assistant for causal inference.\"\n",
        "from openai import OpenAI\n",
        "client = OpenAI()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "HUtdo0PveY5I"
      },
      "outputs": [],
      "source": [
        "import math"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "For LiNGAM"
      ],
      "metadata": {
        "id": "3AGzJBkIxf7B"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# pattern0\n",
        "temp_pattern_yes = []\n",
        "temp_pattern_no = []\n",
        "\n",
        "sample_number = 5\n",
        "top_logprobs = 5\n",
        "\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "      count_yes_1 = 0\n",
        "      count_no_1 = 0\n",
        "\n",
        "      for t in range(sample_number):\n",
        "        prob_yes = 0\n",
        "        prob_no = 0\n",
        "        response = client.chat.completions.create(\n",
        "          model=\"gpt-4-1106-preview\",\n",
        "          messages=[\n",
        "            {\"role\": \"system\",\n",
        "             \"content\": system_role},\n",
        "            {\n",
        "             \"role\": \"user\",\n",
        "             \"content\": causal_2nd_prompt_LiNGAM1_X_pattern0[i,j]\n",
        "            }\n",
        "            ],\n",
        "          temperature=0.7,\n",
        "          max_tokens=1500,\n",
        "          logprobs = True,\n",
        "          top_logprobs = 5\n",
        "          )\n",
        "        for m in range(top_logprobs):\n",
        "          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':\n",
        "            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':\n",
        "            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          else:\n",
        "            continue\n",
        "        temp_pattern_yes.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_yes])\n",
        "        temp_pattern_no.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_no])\n",
        "        print(\"i=\"+str(i)+\", j=\"+str(j)+\",Trial:\"+str(t+1), \"p_yes:\", prob_yes, \"p_no:\", prob_no)\n",
        "\n",
        "columns = ['i', 'j', 'Trial', 'probability']\n",
        "pattern0_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)\n",
        "pattern0_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)\n",
        "pattern0_yes_L_df.to_csv(\"pattern0_yes_L.csv\", encoding='utf-8')\n",
        "pattern0_no_L_df.to_csv(\"pattern0_no_L.csv\", encoding='utf-8')\n",
        "\n",
        "probability_X0_pattern0_L, stdev_X0_pattern0_L = calculate_mean_std_matrices(pattern0_yes_L_df, lingam0_adjacency_matrix_)\n",
        "probability_X0_pattern0_L_df = pd.DataFrame(probability_X0_pattern0_L)\n",
        "stdev_X0_pattern0_L_df = pd.DataFrame(stdev_X0_pattern0_L)\n",
        "probability_X0_pattern0_L_df.to_csv(\"probability_X0_pattern0_L.csv\", encoding='utf-8')\n",
        "stdev_X0_pattern0_L_df.to_csv(\"stdev_X0_pattern0_L.csv\", encoding='utf-8')"
      ],
      "metadata": {
        "id": "3BQluj6Mwa_Y"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# pattern1\n",
        "temp_pattern_yes = []\n",
        "temp_pattern_no = []\n",
        "\n",
        "sample_number = 5\n",
        "top_logprobs = 5\n",
        "\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "      count_yes_1 = 0\n",
        "      count_no_1 = 0\n",
        "\n",
        "      for t in range(sample_number):\n",
        "        prob_yes = 0\n",
        "        prob_no = 0\n",
        "        response = client.chat.completions.create(\n",
        "          model=\"gpt-4-1106-preview\",\n",
        "          messages=[\n",
        "            {\"role\": \"system\",\n",
        "             \"content\": system_role},\n",
        "            {\n",
        "             \"role\": \"user\",\n",
        "             \"content\": causal_2nd_prompt_LiNGAM1_X_pattern1[i,j]\n",
        "            }\n",
        "            ],\n",
        "          temperature=0.7,\n",
        "          max_tokens=1500,\n",
        "          logprobs = True,\n",
        "          top_logprobs = 5\n",
        "          )\n",
        "        for m in range(top_logprobs):\n",
        "          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':\n",
        "            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':\n",
        "            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          else:\n",
        "            continue\n",
        "        temp_pattern_yes.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_yes])\n",
        "        temp_pattern_no.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_no])\n",
        "        print(\"i=\"+str(i)+\", j=\"+str(j)+\",Trial:\"+str(t+1), \"p_yes:\", prob_yes, \"p_no:\", prob_no)\n",
        "\n",
        "columns = ['i', 'j', 'Trial', 'probability']\n",
        "pattern1_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)\n",
        "pattern1_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)\n",
        "pattern1_yes_L_df.to_csv(\"pattern1_yes_L.csv\", encoding='utf-8')\n",
        "pattern1_no_L_df.to_csv(\"pattern1_no_L.csv\", encoding='utf-8')\n",
        "\n",
        "probability_X0_pattern1_L, stdev_X0_pattern1_L = calculate_mean_std_matrices(pattern1_yes_L_df, lingam0_adjacency_matrix_)\n",
        "probability_X0_pattern1_L_df = pd.DataFrame(probability_X0_pattern1_L)\n",
        "stdev_X0_pattern1_L_df = pd.DataFrame(stdev_X0_pattern1_L)\n",
        "probability_X0_pattern1_L_df.to_csv(\"probability_X0_pattern1_L.csv\", encoding='utf-8')\n",
        "stdev_X0_pattern1_L_df.to_csv(\"stdev_X0_pattern1_L.csv\", encoding='utf-8')"
      ],
      "metadata": {
        "id": "4Kr4PEfUfo1s"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# pattern2\n",
        "temp_pattern_yes = []\n",
        "temp_pattern_no = []\n",
        "\n",
        "sample_number = 5\n",
        "top_logprobs = 5\n",
        "\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "      count_yes_1 = 0\n",
        "      count_no_1 = 0\n",
        "\n",
        "      for t in range(sample_number):\n",
        "        prob_yes = 0\n",
        "        prob_no = 0\n",
        "        response = client.chat.completions.create(\n",
        "          model=\"gpt-4-1106-preview\",\n",
        "          messages=[\n",
        "            {\"role\": \"system\",\n",
        "             \"content\": system_role},\n",
        "            {\n",
        "             \"role\": \"user\",\n",
        "             \"content\": causal_2nd_prompt_LiNGAM1_X_pattern2[i,j]\n",
        "            }\n",
        "            ],\n",
        "          temperature=0.7,\n",
        "          max_tokens=1500,\n",
        "          logprobs = True,\n",
        "          top_logprobs = 5\n",
        "          )\n",
        "        for m in range(top_logprobs):\n",
        "          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':\n",
        "            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':\n",
        "            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          else:\n",
        "            continue\n",
        "        temp_pattern_yes.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_yes])\n",
        "        temp_pattern_no.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_no])\n",
        "        print(\"i=\"+str(i)+\", j=\"+str(j)+\",Trial:\"+str(t+1), \"p_yes:\", prob_yes, \"p_no:\", prob_no)\n",
        "\n",
        "columns = ['i', 'j', 'Trial', 'probability']\n",
        "pattern2_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)\n",
        "pattern2_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)\n",
        "pattern2_yes_L_df.to_csv(\"pattern2_yes_L.csv\", encoding='utf-8')\n",
        "pattern2_no_L_df.to_csv(\"pattern2_no_L.csv\", encoding='utf-8')\n",
        "\n",
        "probability_X0_pattern2_L, stdev_X0_pattern2_L = calculate_mean_std_matrices(pattern2_yes_L_df, lingam0_adjacency_matrix_)\n",
        "probability_X0_pattern2_L_df = pd.DataFrame(probability_X0_pattern2_L)\n",
        "stdev_X0_pattern2_L_df = pd.DataFrame(stdev_X0_pattern2_L)\n",
        "probability_X0_pattern2_L_df.to_csv(\"probability_X0_pattern2_L.csv\", encoding='utf-8')\n",
        "stdev_X0_pattern2_L_df.to_csv(\"stdev_X0_pattern2_L.csv\", encoding='utf-8')"
      ],
      "metadata": {
        "id": "-yMK2z1ngWiG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# pattern3\n",
        "temp_pattern_yes = []\n",
        "temp_pattern_no = []\n",
        "\n",
        "sample_number = 5\n",
        "top_logprobs = 5\n",
        "\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "      count_yes_1 = 0\n",
        "      count_no_1 = 0\n",
        "\n",
        "      for t in range(sample_number):\n",
        "        prob_yes = 0\n",
        "        prob_no = 0\n",
        "        response = client.chat.completions.create(\n",
        "          model=\"gpt-4-1106-preview\",\n",
        "          messages=[\n",
        "            {\"role\": \"system\",\n",
        "             \"content\": system_role},\n",
        "            {\n",
        "             \"role\": \"user\",\n",
        "             \"content\": causal_2nd_prompt_LiNGAM1_X_pattern3[i,j]\n",
        "            }\n",
        "            ],\n",
        "          temperature=0.7,\n",
        "          max_tokens=1500,\n",
        "          logprobs = True,\n",
        "          top_logprobs = 5\n",
        "          )\n",
        "        for m in range(top_logprobs):\n",
        "          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':\n",
        "            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':\n",
        "            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          else:\n",
        "            continue\n",
        "        temp_pattern_yes.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_yes])\n",
        "        temp_pattern_no.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_no])\n",
        "        print(\"i=\"+str(i)+\", j=\"+str(j)+\",Trial:\"+str(t+1), \"p_yes:\", prob_yes, \"p_no:\", prob_no)\n",
        "\n",
        "columns = ['i', 'j', 'Trial', 'probability']\n",
        "pattern3_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)\n",
        "pattern3_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)\n",
        "pattern3_yes_L_df.to_csv(\"pattern3_yes_L.csv\", encoding='utf-8')\n",
        "pattern3_no_L_df.to_csv(\"pattern3_no_L.csv\", encoding='utf-8')\n",
        "\n",
        "probability_X0_pattern3_L, stdev_X0_pattern3_L = calculate_mean_std_matrices(pattern3_yes_L_df, lingam0_adjacency_matrix_)\n",
        "probability_X0_pattern3_L_df = pd.DataFrame(probability_X0_pattern3_L)\n",
        "stdev_X0_pattern3_L_df = pd.DataFrame(stdev_X0_pattern3_L)\n",
        "probability_X0_pattern3_L_df.to_csv(\"probability_X0_pattern3_L.csv\", encoding='utf-8')\n",
        "stdev_X0_pattern3_L_df.to_csv(\"stdev_X0_pattern3_L.csv\", encoding='utf-8')"
      ],
      "metadata": {
        "id": "Xxt9lPZ0jc2o"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# pattern4\n",
        "temp_pattern_yes = []\n",
        "temp_pattern_no = []\n",
        "\n",
        "sample_number = 5\n",
        "top_logprobs = 5\n",
        "\n",
        "for i in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "    for j in range(lingam0_adjacency_matrix_.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "      count_yes_1 = 0\n",
        "      count_no_1 = 0\n",
        "\n",
        "      for t in range(sample_number):\n",
        "        prob_yes = 0\n",
        "        prob_no = 0\n",
        "        response = client.chat.completions.create(\n",
        "          model=\"gpt-4-1106-preview\",\n",
        "          messages=[\n",
        "            {\"role\": \"system\",\n",
        "             \"content\": system_role},\n",
        "            {\n",
        "             \"role\": \"user\",\n",
        "             \"content\": causal_2nd_prompt_LiNGAM1_X_pattern4[i,j]\n",
        "            }\n",
        "            ],\n",
        "          temperature=0.7,\n",
        "          max_tokens=1500,\n",
        "          logprobs = True,\n",
        "          top_logprobs = 5\n",
        "          )\n",
        "        for m in range(top_logprobs):\n",
        "          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':\n",
        "            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':\n",
        "            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          else:\n",
        "            continue\n",
        "        temp_pattern_yes.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_yes])\n",
        "        temp_pattern_no.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_no])\n",
        "        print(\"i=\"+str(i)+\", j=\"+str(j)+\",Trial:\"+str(t+1), \"p_yes:\", prob_yes, \"p_no:\", prob_no)\n",
        "\n",
        "columns = ['i', 'j', 'Trial', 'probability']\n",
        "pattern4_yes_L_df = pd.DataFrame(temp_pattern_yes, columns=columns)\n",
        "pattern4_no_L_df = pd.DataFrame(temp_pattern_no, columns=columns)\n",
        "pattern4_yes_L_df.to_csv(\"pattern4_yes_L.csv\", encoding='utf-8')\n",
        "pattern4_no_L_df.to_csv(\"pattern4_no_L.csv\", encoding='utf-8')\n",
        "\n",
        "probability_X0_pattern4_L, stdev_X0_pattern4_L = calculate_mean_std_matrices(pattern4_yes_L_df, lingam0_adjacency_matrix_)\n",
        "probability_X0_pattern4_L_df = pd.DataFrame(probability_X0_pattern4_L)\n",
        "stdev_X0_pattern4_L_df = pd.DataFrame(stdev_X0_pattern4_L)\n",
        "probability_X0_pattern4_L_df.to_csv(\"probability_X0_pattern4_L.csv\", encoding='utf-8')\n",
        "stdev_X0_pattern4_L_df.to_csv(\"stdev_X0_pattern4_L.csv\", encoding='utf-8')"
      ],
      "metadata": {
        "id": "S79DOuH3_QQB"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "For Exact Search"
      ],
      "metadata": {
        "id": "kUx03MGOyhfZ"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# pattern1\n",
        "temp_pattern_yes = []\n",
        "temp_pattern_no = []\n",
        "\n",
        "sample_number = 5\n",
        "top_logprobs = 5\n",
        "\n",
        "for i in range(dag_est_es.shape[0]):\n",
        "    for j in range(dag_est_es.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "      count_yes_1 = 0\n",
        "      count_no_1 = 0\n",
        "\n",
        "      for t in range(sample_number):\n",
        "        prob_yes = 0\n",
        "        prob_no = 0\n",
        "        response = client.chat.completions.create(\n",
        "          model=\"gpt-4-1106-preview\",\n",
        "          messages=[\n",
        "            {\"role\": \"system\",\n",
        "             \"content\": system_role},\n",
        "            {\n",
        "             \"role\": \"user\",\n",
        "             \"content\": causal_2nd_prompt_ES_X_pattern1[i,j]\n",
        "            }\n",
        "            ],\n",
        "          temperature=0.7,\n",
        "          max_tokens=1500,\n",
        "          logprobs = True,\n",
        "          top_logprobs = 5\n",
        "          )\n",
        "        for m in range(top_logprobs):\n",
        "          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':\n",
        "            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':\n",
        "            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          else:\n",
        "            continue\n",
        "        temp_pattern_yes.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_yes])\n",
        "        temp_pattern_no.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_no])\n",
        "        print(\"i=\"+str(i)+\", j=\"+str(j)+\",Trial:\"+str(t+1), \"p_yes:\", prob_yes, \"p_no:\", prob_no)\n",
        "\n",
        "columns = ['i', 'j', 'Trial', 'probability']\n",
        "pattern1_yes_E_df = pd.DataFrame(temp_pattern_yes, columns=columns)\n",
        "pattern1_no_E_df = pd.DataFrame(temp_pattern_no, columns=columns)\n",
        "pattern1_yes_E_df.to_csv(\"pattern1_yes_E.csv\", encoding='utf-8')\n",
        "pattern1_no_E_df.to_csv(\"pattern1_no_E.csv\", encoding='utf-8')\n",
        "\n",
        "probability_X0_pattern1_E, stdev_X0_pattern1_E = calculate_mean_std_matrices(pattern1_yes_E_df, dag_est_es)\n",
        "probability_X0_pattern1_E_df = pd.DataFrame(probability_X0_pattern1_E)\n",
        "stdev_X0_pattern1_E_df = pd.DataFrame(stdev_X0_pattern1_E)\n",
        "probability_X0_pattern1_E_df.to_csv(\"probability_X0_pattern1_E.csv\", encoding='utf-8')\n",
        "stdev_X0_pattern1_E_df.to_csv(\"stdev_X0_pattern1_E.csv\", encoding='utf-8')"
      ],
      "metadata": {
        "id": "Z4JorOwx_KmA"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# pattern2\n",
        "temp_pattern_yes = []\n",
        "temp_pattern_no = []\n",
        "\n",
        "sample_number = 5\n",
        "top_logprobs = 5\n",
        "\n",
        "for i in range(dag_est_es.shape[0]):\n",
        "    for j in range(dag_est_es.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "      count_yes_1 = 0\n",
        "      count_no_1 = 0\n",
        "\n",
        "      for t in range(sample_number):\n",
        "        prob_yes = 0\n",
        "        prob_no = 0\n",
        "        response = client.chat.completions.create(\n",
        "          model=\"gpt-4-1106-preview\",\n",
        "          messages=[\n",
        "            {\"role\": \"system\",\n",
        "             \"content\": system_role},\n",
        "            {\n",
        "             \"role\": \"user\",\n",
        "             \"content\": causal_2nd_prompt_ES_X_pattern2[i,j]\n",
        "            }\n",
        "            ],\n",
        "          temperature=0.7,\n",
        "          max_tokens=1500,\n",
        "          logprobs = True,\n",
        "          top_logprobs = 5\n",
        "          )\n",
        "        for m in range(top_logprobs):\n",
        "          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':\n",
        "            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':\n",
        "            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          else:\n",
        "            continue\n",
        "        temp_pattern_yes.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_yes])\n",
        "        temp_pattern_no.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_no])\n",
        "        print(\"i=\"+str(i)+\", j=\"+str(j)+\",Trial:\"+str(t+1), \"p_yes:\", prob_yes, \"p_no:\", prob_no)\n",
        "\n",
        "columns = ['i', 'j', 'Trial', 'probability']\n",
        "pattern2_yes_E_df = pd.DataFrame(temp_pattern_yes, columns=columns)\n",
        "pattern2_no_E_df = pd.DataFrame(temp_pattern_no, columns=columns)\n",
        "pattern2_yes_E_df.to_csv(\"pattern2_yes_E.csv\", encoding='utf-8')\n",
        "pattern2_no_E_df.to_csv(\"pattern2_no_E.csv\", encoding='utf-8')\n",
        "\n",
        "probability_X0_pattern2_E, stdev_X0_pattern2_E = calculate_mean_std_matrices(pattern2_yes_E_df, dag_est_es)\n",
        "probability_X0_pattern2_E_df = pd.DataFrame(probability_X0_pattern2_E)\n",
        "stdev_X0_pattern2_E_df = pd.DataFrame(stdev_X0_pattern2_E)\n",
        "probability_X0_pattern2_E_df.to_csv(\"probability_X0_pattern2_E.csv\", encoding='utf-8')\n",
        "stdev_X0_pattern2_E_df.to_csv(\"stdev_X0_pattern2_E.csv\", encoding='utf-8')"
      ],
      "metadata": {
        "id": "XPORWY79_DJ1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "For PC"
      ],
      "metadata": {
        "id": "8bToOEoZ-JOs"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# pattern1\n",
        "temp_pattern_yes = []\n",
        "temp_pattern_no = []\n",
        "\n",
        "sample_number = 5\n",
        "top_logprobs = 5\n",
        "\n",
        "for i in range(dag_est_pc.shape[0]):\n",
        "    for j in range(dag_est_pc.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "      count_yes_1 = 0\n",
        "      count_no_1 = 0\n",
        "\n",
        "      for t in range(sample_number):\n",
        "        prob_yes = 0\n",
        "        prob_no = 0\n",
        "        response = client.chat.completions.create(\n",
        "          model=\"gpt-4-1106-preview\",\n",
        "          messages=[\n",
        "            {\"role\": \"system\",\n",
        "             \"content\": system_role},\n",
        "            {\n",
        "             \"role\": \"user\",\n",
        "             \"content\": causal_2nd_prompt_PC_X_pattern1[i,j]\n",
        "            }\n",
        "            ],\n",
        "          temperature=0.7,\n",
        "          max_tokens=1500,\n",
        "          logprobs = True,\n",
        "          top_logprobs = 5\n",
        "          )\n",
        "        for m in range(top_logprobs):\n",
        "          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':\n",
        "            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':\n",
        "            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          else:\n",
        "            continue\n",
        "        temp_pattern_yes.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_yes])\n",
        "        temp_pattern_no.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_no])\n",
        "        print(\"i=\"+str(i)+\", j=\"+str(j)+\",Trial:\"+str(t+1), \"p_yes:\", prob_yes, \"p_no:\", prob_no)\n",
        "\n",
        "columns = ['i', 'j', 'Trial', 'probability']\n",
        "pattern1_yes_P_df = pd.DataFrame(temp_pattern_yes, columns=columns)\n",
        "pattern1_no_P_df = pd.DataFrame(temp_pattern_no, columns=columns)\n",
        "pattern1_yes_P_df.to_csv(\"pattern1_yes_P.csv\", encoding='utf-8')\n",
        "pattern1_no_P_df.to_csv(\"pattern1_no_P.csv\", encoding='utf-8')\n",
        "\n",
        "probability_X0_pattern1_P, stdev_X0_pattern1_P = calculate_mean_std_matrices(pattern1_yes_P_df, dag_est_pc)\n",
        "probability_X0_pattern1_P_df = pd.DataFrame(probability_X0_pattern1_P)\n",
        "stdev_X0_pattern1_P_df = pd.DataFrame(stdev_X0_pattern1_P)\n",
        "probability_X0_pattern1_P_df.to_csv(\"probability_X0_pattern1_P.csv\", encoding='utf-8')\n",
        "stdev_X0_pattern1_P_df.to_csv(\"stdev_X0_pattern1_P.csv\", encoding='utf-8')"
      ],
      "metadata": {
        "id": "3EmIMipR-KkP"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# pattern2\n",
        "temp_pattern_yes = []\n",
        "temp_pattern_no = []\n",
        "\n",
        "sample_number = 5\n",
        "top_logprobs = 5\n",
        "\n",
        "for i in range(dag_est_pc.shape[0]):\n",
        "    for j in range(dag_est_pc.shape[0]):\n",
        "      if i == j:\n",
        "        continue\n",
        "      count_yes_1 = 0\n",
        "      count_no_1 = 0\n",
        "\n",
        "      for t in range(sample_number):\n",
        "        prob_yes = 0\n",
        "        prob_no = 0\n",
        "        response = client.chat.completions.create(\n",
        "          model=\"gpt-4-1106-preview\",\n",
        "          messages=[\n",
        "            {\"role\": \"system\",\n",
        "             \"content\": system_role},\n",
        "            {\n",
        "             \"role\": \"user\",\n",
        "             \"content\": causal_2nd_prompt_PC_X_pattern2[i,j]\n",
        "            }\n",
        "            ],\n",
        "          temperature=0.7,\n",
        "          max_tokens=1500,\n",
        "          logprobs = True,\n",
        "          top_logprobs = 5\n",
        "          )\n",
        "        for m in range(top_logprobs):\n",
        "          if response.choices[0].logprobs.content[0].top_logprobs[m].token == 'Yes' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'yes':\n",
        "            prob_yes = prob_yes + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          elif response.choices[0].logprobs.content[0].top_logprobs[m].token == 'No' or response.choices[0].logprobs.content[0].top_logprobs[m].token == 'no':\n",
        "            prob_no = prob_no + math.e**(response.choices[0].logprobs.content[0].top_logprobs[m].logprob)\n",
        "          else:\n",
        "            continue\n",
        "        temp_pattern_yes.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_yes])\n",
        "        temp_pattern_no.append([f\"{i}\", f\"{j}\", f\"Trial {t+1}\", prob_no])\n",
        "        print(\"i=\"+str(i)+\", j=\"+str(j)+\",Trial:\"+str(t+1), \"p_yes:\", prob_yes, \"p_no:\", prob_no)\n",
        "\n",
        "columns = ['i', 'j', 'Trial', 'probability']\n",
        "pattern2_yes_P_df = pd.DataFrame(temp_pattern_yes, columns=columns)\n",
        "pattern2_no_P_df = pd.DataFrame(temp_pattern_no, columns=columns)\n",
        "pattern2_yes_P_df.to_csv(\"pattern2_yes_P.csv\", encoding='utf-8')\n",
        "pattern2_no_P_df.to_csv(\"pattern2_no_P.csv\", encoding='utf-8')\n",
        "\n",
        "probability_X0_pattern2_P, stdev_X0_pattern2_P = calculate_mean_std_matrices(pattern2_yes_P_df, dag_est_pc)\n",
        "probability_X0_pattern2_P_df = pd.DataFrame(probability_X0_pattern2_P)\n",
        "stdev_X0_pattern2_P_df = pd.DataFrame(stdev_X0_pattern2_P)\n",
        "probability_X0_pattern2_P_df.to_csv(\"probability_X0_pattern2_P.csv\", encoding='utf-8')\n",
        "stdev_X0_pattern12_P_df.to_csv(\"stdev_X0_pattern2_P.csv\", encoding='utf-8')"
      ],
      "metadata": {
        "id": "_XftIAva-_Jq"
      },
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}