{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "E5l_zMN6uxsy"
      },
      "outputs": [],
      "source": [
        "!pip install numpy==1.25.0"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install lingam\n",
        "!pip install factor_analyzer\n",
        "!pip install igraph\n",
        "!pip install pygam"
      ],
      "metadata": {
        "id": "aAjytqXrvLHh"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import sys\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "import graphviz\n",
        "import lingam\n",
        "import semopy\n",
        "from sklearn.preprocessing import StandardScaler\n",
        "import hashlib\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "import matplotlib.image as mpimg\n",
        "import io\n",
        "from scipy.stats import norm\n",
        "from copy import deepcopy\n",
        "import networkx as nx\n",
        "from networkx.algorithms import cycles\n",
        "from itertools import combinations\n",
        "from sklearn.linear_model import LassoLarsIC, LinearRegression\n",
        "from sklearn.utils import check_array, check_scalar\n",
        "\n",
        "print(\"NumPy\",  \"ver:\", np.__version__)\n",
        "print(\"Pandas\", \"ver:\", pd.__version__)\n",
        "print(\"Graphviz\",   \"ver:\", graphviz.__version__)\n",
        "print(\"LiNGAM\", \"ver:\", lingam.__version__)\n",
        "\n",
        "np.set_printoptions(precision=3, suppress=True)"
      ],
      "metadata": {
        "id": "szZDUGJrvclL"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Transformation of the Cyclic Matrix into Acyclic Matrices"
      ],
      "metadata": {
        "id": "nDAGzJlszOOA"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def remove_cycles_with_networkx(matrix):\n",
        "\n",
        "    negative_indices = np.where(matrix < 0)\n",
        "\n",
        "    temp_matrix = np.where(matrix < 0, 1, matrix).astype(int)\n",
        "\n",
        "    G = nx.DiGraph(temp_matrix)\n",
        "    cycles = list(nx.simple_cycles(G))\n",
        "\n",
        "    edges_to_remove = set()\n",
        "    for cycle in cycles:\n",
        "        for i in range(len(cycle)):\n",
        "            j = (i + 1) % len(cycle)\n",
        "            edges_to_remove.add((cycle[i], cycle[j]))\n",
        "\n",
        "    valid_solutions = []\n",
        "    for k in range(1, len(edges_to_remove) + 1):\n",
        "        for edges in combinations(edges_to_remove, k):\n",
        "            H = G.copy()\n",
        "            H.remove_edges_from(edges)\n",
        "            if nx.is_directed_acyclic_graph(H):\n",
        "                result_matrix = nx.to_numpy_array(H, dtype=int)\n",
        "\n",
        "                result_matrix[negative_indices] = matrix[negative_indices]\n",
        "                valid_solutions.append(result_matrix)\n",
        "        if valid_solutions:\n",
        "            break\n",
        "\n",
        "    return valid_solutions"
      ],
      "metadata": {
        "id": "q1EhTCjRvuBl"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Selection of the Optimal Matrix with BIC"
      ],
      "metadata": {
        "id": "8KWLoyXNz2_9"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#calculation of the stats of model fitting\n",
        "def evaluate_model_fit(adjacency_matrix, X, is_ordinal=None):\n",
        "    \"\"\" evaluate the given adjacency matrix and return fit indices\n",
        "\n",
        "    Parameters\n",
        "    ----------\n",
        "    adjacency_matrix : array-like, shape (n_features, n_features)\n",
        "        Adjacency matrix representing a causal graph.\n",
        "        The i-th column and row correspond to the i-th column of X.\n",
        "    X : array-like, shape (n_samples, n_features)\n",
        "        Training data.\n",
        "    is_ordinal : array-like, shape (n_features,)\n",
        "        Binary list. The i-th element represents that the i-th column of X is ordinal or not.\n",
        "        0 means not ordinal, otherwise ordinal.\n",
        "\n",
        "    Return\n",
        "    ------\n",
        "    fit_indices : pandas.DataFrame\n",
        "        Fit indices. This API uses semopy's calc_stats(). See semopy's reference for details.\n",
        "    \"\"\"\n",
        "\n",
        "    # check inputs\n",
        "    adj = check_array(adjacency_matrix, force_all_finite=\"allow-nan\")\n",
        "    if adj.ndim != 2 or (adj.shape[0] != adj.shape[1]):\n",
        "        raise ValueError(\"adj must be an square matrix.\")\n",
        "\n",
        "    X = check_array(X)\n",
        "    if X.shape[1] != adj.shape[1]:\n",
        "        raise ValueError(\"X.shape[1] and adj.shape[1] must be the same.\")\n",
        "\n",
        "    if is_ordinal is None:\n",
        "        is_ordinal = np.zeros(X.shape[1])\n",
        "    else:\n",
        "        is_ordinal = check_array(is_ordinal, ensure_2d=False).flatten()\n",
        "    if is_ordinal.shape[0] != adj.shape[1]:\n",
        "        raise ValueError(\"is_ordinal.shape[0] and adj.shape[1] must be the same.\")\n",
        "\n",
        "    # build desc\n",
        "    desc = \"\"\n",
        "    eta_names = []\n",
        "\n",
        "    for i, row in enumerate(adj):\n",
        "        # exogenous\n",
        "        if np.sum(np.isnan(row)) == 0 and np.sum(np.isclose(row, 0)) == row.shape[0]:\n",
        "            continue\n",
        "\n",
        "        desc += f\"x{i:d} ~ \"\n",
        "\n",
        "        for j, elem in enumerate(row):\n",
        "            if np.isnan(elem):\n",
        "                eta_name = f\"eta_{i}_{j}\" if i < j else f\"eta_{j}_{i}\"\n",
        "                desc += f\"{eta_name} + \"\n",
        "                if eta_name not in eta_names:\n",
        "                    eta_names.append(eta_name)\n",
        "            elif not np.isclose(elem, 0):\n",
        "                desc += f\"x{j:d} + \"\n",
        "        desc = desc[:-len(\" * \")] + \"\\n\"\n",
        "\n",
        "    if len(eta_names) > 0:\n",
        "        desc += \"DEFINE(latent) \" + \" \".join(eta_names) + \"\\n\"\n",
        "\n",
        "    if sum(is_ordinal) > 0:\n",
        "        indices = np.argwhere(is_ordinal).flatten()\n",
        "\n",
        "        desc += \"DEFINE(ordinal)\"\n",
        "        for i in indices:\n",
        "            desc += f\" x{i}\"\n",
        "        desc += \"\\n\"\n",
        "\n",
        "    columns = [f\"x{i:d}\" for i in range(X.shape[1])]\n",
        "    X = pd.DataFrame(X, columns=columns)\n",
        "\n",
        "    m = semopy.Model(desc)\n",
        "    m.fit(X)\n",
        "\n",
        "    stats = semopy.calc_stats(m)\n",
        "\n",
        "    return stats"
      ],
      "metadata": {
        "id": "t-Seo3edusWA"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#Selection of the Optimal Matrix for LiNGAM with BIC\n",
        "def find_best_LiNGAM_result(X, acyclic_prior_knowledge):\n",
        "  # X: dataset(pd.dataframe)\n",
        "  # acyclic_prior_knowledge: list of acyclic matrices\n",
        "    best_bic = np.inf\n",
        "    best_model = None\n",
        "    best_prior_knowledge = None\n",
        "    for pk in acyclic_prior_knowledge:\n",
        "\n",
        "        model = lingam.DirectLiNGAM(X, prior_knowledge=pk)\n",
        "        model.fit(X)\n",
        "        B = model.adjacency_matrix_\n",
        "        bic = evaluate_model_fit(B, X, is_ordinal=None).BIC.iloc[0]\n",
        "\n",
        "        if bic < best_bic:\n",
        "            best_bic = bic\n",
        "            best_model = model\n",
        "            best_prior_knowledge = pk\n",
        "        else:\n",
        "            continue\n",
        "\n",
        "    return best_prior_knowledge, best_model"
      ],
      "metadata": {
        "id": "Lg_zRgYn0OXG"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}