{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "from pivotAlg import *\n",
    "from ourAlg import *\n",
    "from icml21Alg import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_pivotAlg(num_of_nodes, streaming_file, k_list, output_dir, repeated_times):\n",
    "    '''\n",
    "    Runs the Pivot Algorithm for the given number of nodes, streaming file, k values, and output directory.\n",
    "    '''\n",
    "    cost_dict = {}\n",
    "    for k in k_list:\n",
    "        cost = []\n",
    "        for time in range(repeated_times):\n",
    "            output_file = os.path.join(output_dir, f'pivotAlg_output_k_{k}.txt')\n",
    "\n",
    "            random.seed(time)\n",
    "            np.random.seed(time)\n",
    "            \n",
    "            alg = PivotAlgorithm(num_of_nodes, k)\n",
    "            alg.run(streaming_file, output_file)\n",
    "            \n",
    "            cost.append(alg_pay_cost(num_of_nodes, streaming_file, output_file))\n",
    "        cost_dict[k] = (np.mean(cost), np.std(cost))\n",
    "\n",
    "    return cost_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_ourAlg(num_of_nodes, streaming_file, ground_truth_file, prediction_dir, pertubation_list, cluster_num_list, k_list, output_dir, repeated_times):\n",
    "    ''' \n",
    "    Run our algorithm for the given parameters \n",
    "    '''\n",
    "    if cluster_num_list == None:\n",
    "        beta_list = {}\n",
    "        cost_dict = {}\n",
    "        for k in k_list:\n",
    "            beta_list[k] = {}\n",
    "            cost_dict[k] = {}\n",
    "            for pertubation in pertubation_list:\n",
    "                prediction_file = os.path.join(prediction_dir, f'prediction_gt_{pertubation}.txt')\n",
    "                if not os.path.exists(prediction_file):\n",
    "                    prediction_file = os.path.join(prediction_dir, f'prediction_opt_{pertubation}.txt')\n",
    "\n",
    "                predictor = load_prediction(prediction_file)\n",
    "                beta_list[k][pertubation] = calculate_beta(streaming_file, ground_truth_file, prediction_file)\n",
    "\n",
    "                cost = []\n",
    "                for time in range(repeated_times):\n",
    "                    output_file1 = os.path.join(output_dir, f'ourAlg_output1_k_{k}_pertubation_{pertubation}.txt')\n",
    "                    output_file2 = os.path.join(output_dir, f'ourAlg_output2_k_{k}_pertubation_{pertubation}.txt')\n",
    "\n",
    "                    random.seed(time)\n",
    "                    np.random.seed(time)\n",
    "                \n",
    "                    alg = OurStreamingAlgorithm(num_of_nodes, k, predictor)\n",
    "                    alg.run(streaming_file, output_file1, output_file2)\n",
    "                    cost1 = alg_pay_cost(num_of_nodes, streaming_file, output_file1)\n",
    "                    cost2 = alg_pay_cost(num_of_nodes, streaming_file, output_file2)\n",
    "\n",
    "                    cost.append(min(cost1, cost2))\n",
    "                cost_dict[k][pertubation] = (np.mean(cost), np.std(cost))\n",
    "\n",
    "        return beta_list, cost_dict\n",
    "    \n",
    "    else:\n",
    "        cluster_num = {}\n",
    "        cost_dict = {}\n",
    "        for k in k_list:\n",
    "            cluster_num[k] = {}\n",
    "            cost_dict[k] = {}\n",
    "            for num in cluster_num_list:\n",
    "                prediction_file = os.path.join(prediction_dir, f'prediction_se_{num}.txt')\n",
    "\n",
    "                predictor = load_prediction(prediction_file)\n",
    "                cluster_num[k][num] = num\n",
    "\n",
    "                cost = []\n",
    "                for time in range(repeated_times):\n",
    "                    output_file1 = os.path.join(output_dir, f'ourAlg_output1_k_{k}_cluster_num_{num}.txt')\n",
    "                    output_file2 = os.path.join(output_dir, f'ourAlg_output2_k_{k}_cluster_num_{num}.txt')\n",
    "\n",
    "                    random.seed(time)\n",
    "                    np.random.seed(time)\n",
    "                \n",
    "                    alg = OurStreamingAlgorithm(num_of_nodes, k, predictor)\n",
    "                    alg.run(streaming_file, output_file1, output_file2)\n",
    "                    cost1 = alg_pay_cost(num_of_nodes, streaming_file, output_file1)\n",
    "                    cost2 = alg_pay_cost(num_of_nodes, streaming_file, output_file2)\n",
    "\n",
    "                    cost.append(min(cost1, cost2))\n",
    "                cost_dict[k][num] = (np.mean(cost), np.std(cost))\n",
    "\n",
    "        return cluster_num, cost_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_icml21Alg(num_of_nodes, streaming_file, output_dir, repeated_times):\n",
    "    '''\n",
    "    This function runs the algorithm for the ICML 2021 paper trying different values of beta and lambda and returns the best result.\n",
    "    '''\n",
    "    step = 0.1\n",
    "    best_beta = 0\n",
    "    best_lambda = 0\n",
    "    best_cost = (1000000, 0)\n",
    "\n",
    "    for beta in range(0, int(1 / step) + 1):\n",
    "        for lambda_ in range(0, int(1 / step) + 1):\n",
    "            beta_value = beta * step\n",
    "            lambda_value = lambda_ * step\n",
    "\n",
    "            cost = []\n",
    "            repeated_times = 1\n",
    "            for time in range(repeated_times):\n",
    "                output_file = os.path.join(output_dir, 'icml21_output.txt')\n",
    "\n",
    "                random.seed(time)\n",
    "                np.random.seed(time)\n",
    "            \n",
    "                alg = AgreementBasedAlgorithm(num_of_nodes, beta_value, lambda_value)\n",
    "                alg.run(streaming_file, output_file)\n",
    "                cost.append(alg_pay_cost(num_of_nodes, streaming_file, output_file))\n",
    "            \n",
    "            if(np.mean(cost) < best_cost[0]):\n",
    "                best_beta = beta_value\n",
    "                best_lambda = lambda_value\n",
    "                best_cost = (np.mean(cost), np.std(cost))\n",
    "\n",
    "    return best_beta, best_lambda, best_cost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_synthetic_datasets(data_dir, output_dir, repeated_times):\n",
    "    num_of_nodes = [1000]\n",
    "    probability_list = [0.95]\n",
    "    # SBM n = 1000, k = 20, pertubation_list = [0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15]\n",
    "    pertubation_list = [0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15]\n",
    "\n",
    "    statistics_file = os.path.join(output_dir, f'SBM_statistics.csv')\n",
    "    with open(os.path.join(statistics_file), 'w') as f:\n",
    "        f.write(f'num_of_nodes, ground_truth_cost, icml21_beta, icml21_lambda, icml21_alg_mean, icml21_alg_std, k, pivot_alg_mean, pivot_alg_std, our_alg_beta, our_alg_mean, our_alg_std\\n')\n",
    "\n",
    "    for num in num_of_nodes:\n",
    "        for probability in probability_list:\n",
    "            print(f'\\n\\nRunning SBM with {num} nodes.')\n",
    "            streaming_file = os.path.join(data_dir, f'nodes_{num}/prob_{probability}/streaming.txt')\n",
    "            dir = os.path.join(output_dir, f'SBM/nodes_{num}/prob_{probability}')\n",
    "            os.makedirs(dir, exist_ok=True)  \n",
    "\n",
    "            # calculate the ground truth cost\n",
    "            ground_truth_file = os.path.join(data_dir, f'nodes_{num}/prob_{probability}/ground_truth.txt')\n",
    "            ground_truth_cost = calculate_cost(streaming_file, ground_truth_file)\n",
    "            print(f'Ground truth cost calculated')\n",
    "\n",
    "            # run ICML'21 algorithm\n",
    "            icml21_beta, icml21_lambda, icml21_cost = run_icml21Alg(num, streaming_file, dir, repeated_times)\n",
    "            print(f'ICML21 cost calculated')\n",
    "\n",
    "            k_list = [20]\n",
    "            # run pivotAlg\n",
    "            pivot_alg_cost = run_pivotAlg(num, streaming_file, k_list, dir, repeated_times)\n",
    "            print(f'PivotAlg cost calculated')\n",
    "\n",
    "            # run LAA\n",
    "            prediction_dic = os.path.join(data_dir, f'nodes_{num}/prob_{probability}')\n",
    "            our_alg_beta, our_alg_cost = run_ourAlg(num, streaming_file, ground_truth_file, prediction_dic, pertubation_list, None, k_list, dir, repeated_times)\n",
    "            print(f'OurAlg cost calculated')\n",
    "\n",
    "            # write results to file\n",
    "            with open(os.path.join(statistics_file), 'a') as f:\n",
    "                for k in k_list:\n",
    "                    for pertubation in pertubation_list:\n",
    "                        f.write(f'{num}, {ground_truth_cost}, {icml21_beta}, {icml21_lambda}, {icml21_cost[0]}, {icml21_cost[1]}, {k}, {pivot_alg_cost[k][0]}, {pivot_alg_cost[k][1]}, {our_alg_beta[k][pertubation]}, {our_alg_cost[k][pertubation][0]}, {our_alg_cost[k][pertubation][1]}\\n')\n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_first_type_datasets(data_dir, output_dir, repeated_times):\n",
    "    datasets = ['facebook3980']\n",
    "    # facebook0, k=50, pertubation_list = [0.002, 0.004, 0.006, 0.008, 0.01, 0.012, 0.014, 0.016, 0.018, 0.02]\n",
    "    # facebook414, k=30, pertubation_list = [0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.011, 0.012, 0.013, 0.014]\n",
    "    # facebook3980, k=10, pertubation_list = [0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.011, 0.012, 0.013, 0.014]\n",
    "    pertubation_list = [0.002, 0.004, 0.006, 0.008, 0.01, 0.012, 0.014, 0.016, 0.018, 0.02]\n",
    "\n",
    "    statistics_file = os.path.join(output_dir, f'first_type_datasets_statistics.csv')\n",
    "    with open(os.path.join(statistics_file), 'w') as f:\n",
    "        f.write(f'dataset, num_of_nodes, opt_cost, icml21_beta, icml21_lambda, icml21_alg_mean, icml21_alg_std, k, pivot_alg_mean, pivot_alg_std, our_alg_beta, our_alg_mean, our_alg_std\\n')\n",
    "\n",
    "    for dataset in datasets:\n",
    "        print(f'\\n\\nRunning {dataset} dataset.')\n",
    "        streaming_file = os.path.join(data_dir, f'{dataset}/streaming.txt')\n",
    "        dir = os.path.join(output_dir, f'facebook/{dataset}')\n",
    "        os.makedirs(dir, exist_ok=True)  \n",
    "\n",
    "        # calculate the ground truth cost\n",
    "        opt_file = os.path.join(data_dir, f'{dataset}/OPT_sol.txt')\n",
    "        opt_cost = calculate_cost(streaming_file, opt_file)\n",
    "        print(f'Optimal cost calculated')\n",
    "\n",
    "        # read the number of nodes in the dataset\n",
    "        with open(streaming_file, 'r') as f:\n",
    "            num = int(f.readline())\n",
    "\n",
    "        # run ICML'21 algorithm\n",
    "        icml21_beta, icml21_lambda, icml21_cost = run_icml21Alg(num, streaming_file, dir, repeated_times)\n",
    "        print(f'ICML21 cost calculated')\n",
    "\n",
    "        k_list = [10]\n",
    "        # run pivotAlg\n",
    "        pivot_alg_cost = run_pivotAlg(num, streaming_file, k_list, dir, repeated_times)\n",
    "        print(f'PivotAlg cost calculated')\n",
    "\n",
    "        # run LAA\n",
    "        prediction_dic = os.path.join(data_dir, f'{dataset}')\n",
    "        our_alg_beta, our_alg_cost = run_ourAlg(num, streaming_file, opt_file, prediction_dic, pertubation_list, None, k_list, dir, repeated_times)\n",
    "        print(f'OurAlg cost calculated')\n",
    "\n",
    "        # write results to file\n",
    "        with open(os.path.join(statistics_file), 'a') as f:\n",
    "            for k in k_list:\n",
    "                for pertubation in pertubation_list:\n",
    "                    f.write(f'{dataset}, {num}, {opt_cost}, {icml21_beta}, {icml21_lambda}, {icml21_cost[0]}, {icml21_cost[1]}, {k}, {pivot_alg_cost[k][0]}, {pivot_alg_cost[k][1]}, {our_alg_beta[k][pertubation]}, {our_alg_cost[k][pertubation][0]}, {our_alg_cost[k][pertubation][1]}\\n')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def run_second_type_datasets(data_dir, output_dir, repeated_times):\n",
    "    # datasets = ['email-Eu-core', 'lastfm_asia']\n",
    "    # email-Eu-core: k = 20, cluster_num_list = [600, 650, 700, 750, 800, 850, 900, 950, 1000]\n",
    "    # lastfm_asia: k = 100, cluster_num_list = [2000, 2500, 3000, 3500, 4000]\n",
    "    datasets = ['lastfm_asia']\n",
    "    cluster_num_list = [3000]\n",
    "\n",
    "    statistics_file = os.path.join(output_dir, f'second_type_datasets_statistics.csv')\n",
    "    with open(os.path.join(statistics_file), 'w') as f:\n",
    "        f.write(f'dataset, num_of_nodes, icml21_beta, icml21_lambda, icml21_alg_mean, icml21_alg_std, k, pivot_alg_mean, pivot_alg_std, cluster_num, our_alg_mean, our_alg_std\\n')\n",
    "\n",
    "    for dataset in datasets:\n",
    "        print(f'\\n\\nRunning {dataset} dataset.')\n",
    "        streaming_file = os.path.join(data_dir, f'{dataset}/streaming.txt')\n",
    "        dir = os.path.join(output_dir, f'{dataset}')\n",
    "        os.makedirs(dir, exist_ok=True)  \n",
    "\n",
    "        # read the number of nodes in the dataset\n",
    "        with open(streaming_file, 'r') as f:\n",
    "            vertices_num = int(f.readline())\n",
    "\n",
    "        # run ICML'21 algorithm\n",
    "        # icml21_beta, icml21_lambda, icml21_cost = run_icml21Alg(vertices_num, streaming_file, dir, repeated_times)\n",
    "        print(f'ICML21 cost calculated')\n",
    "        icml21_beta = 1.0 \n",
    "        icml21_lambda = 0.9\n",
    "        icml21_cost = [236897.0, 0.0]\n",
    "        # print(f'ICML21 beta: {icml21_beta}, ICML21 lambda: {icml21_lambda}, {icml21_cost[0]}, {icml21_cost[1]}\\n')\n",
    "\n",
    "        k_list = [100]\n",
    "        # run pivotAlg\n",
    "        # '''\n",
    "        # pivot_alg_cost = run_pivotAlg(vertices_num, streaming_file, k_list, dir, repeated_times)\n",
    "        print(f'PivotAlg cost calculated')\n",
    "        pivot_alg_cost = {}\n",
    "        for k in k_list:\n",
    "            pivot_alg_cost[k] = [31792.65, 2023.226588274284]\n",
    "            print(f'{k}, {pivot_alg_cost[k][0]}, {pivot_alg_cost[k][1]}\\n')\n",
    "        # '''\n",
    "        # test\n",
    "        # run LAA\n",
    "        prediction_dic = os.path.join(data_dir, f'{dataset}')\n",
    "        cluster_num, our_alg_cost = run_ourAlg(vertices_num, streaming_file, None, prediction_dic, None, cluster_num_list, k_list, dir, repeated_times)\n",
    "        print(f'OurAlg cost calculated')\n",
    "        for k in k_list:\n",
    "            for num in cluster_num_list:\n",
    "                print(f'{k}, {cluster_num[k][num]}, {our_alg_cost[k][num][0]}, {our_alg_cost[k][num][1]}\\n')\n",
    "\n",
    "        # write results to file\n",
    "        with open(os.path.join(statistics_file), 'a') as f:\n",
    "            for k in k_list:\n",
    "                for num in cluster_num_list:\n",
    "                    f.write(f'{dataset}, {vertices_num}, {icml21_beta}, {icml21_lambda}, {icml21_cost[0]}, {icml21_cost[1]}, {k}, {pivot_alg_cost[k][0]}, {pivot_alg_cost[k][1]}, {cluster_num[k][num]}, {our_alg_cost[k][num][0]}, {our_alg_cost[k][num][1]}\\n')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dynamicAlg import *\n",
    "import os\n",
    "def run_dynamic_alg(data_dir, prediction_dir, output_dir, repeated_times):\n",
    "    \n",
    "    # perturbation_list = [0.1, 0.12, 0.14, 0.16, 0.18, 0.2, 0.22, 0.24, 0.26, 0.28]\n",
    "    # num_of_nodes = [100] # [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]\n",
    "    # preprocess_synthetic_datasets(num_of_nodes, data_dir, prediction_dir, perturbation_list)\n",
    "    \n",
    "    # first_type_datasets = ['facebook414']\n",
    "    # perturbation_list = [0.005, 0.006, 0.007, 0.008, 0.009, 0.01, 0.011, 0.012, 0.013, 0.014]\n",
    "    # num_of_nodes = preprocess_first_type_datasets(first_type_datasets, data_dir, prediction_dir, perturbation_list)\n",
    "\n",
    "    # second_type_datasets = ['email-Eu-core']\n",
    "    # cluster_num_list = [600, 650, 700, 750, 800, 850, 900, 950, 1000]\n",
    "    # num_of_nodes = preprocess_second_type_datasets(second_type_datasets, data_dir, prediction_dir, cluster_num_list)\n",
    "    second_type_datasets = ['lastfm_asia']\n",
    "    cluster_num_list = [7000, 7100, 7200, 7300, 7400, 7500, 7600]\n",
    "\n",
    "    # statistics_file = f'./results/statistics_dynamic_vary_bata.csv'\n",
    "    statistics_file = f'./results/dynamic_lastfm_asia.csv'\n",
    "    with open(statistics_file, 'w') as f:\n",
    "        # f.write(f'dataset, nodes, OPT_cost, repeated_times, perturbation, beta, approx_ratio, average_cost, deviation\\n')\n",
    "        f.write(f'dataset, nodes, repeated_times, cluster_num, approx_ratio, average_cost, deviation\\n')\n",
    "\n",
    "    # for num in num_of_nodes:\n",
    "    # for dataset in first_type_datasets:\n",
    "    for dataset in second_type_datasets:\n",
    "        # graph_file = data_dir + f'SBM/nodes_{num}/prob_0.7/edges.txt'\n",
    "        # graph_file = data_dir + f'facebook/{dataset}/edges.txt'\n",
    "        # graph_file = data_dir + f'{dataset}/email-Eu-core.txt'\n",
    "        graph_file = data_dir + f'{dataset}/lastfm_asia_edges.csv'\n",
    "        original_graph = nx.read_edgelist(graph_file, create_using=nx.Graph, nodetype=int)\n",
    "        degrees = [original_graph.degree[i] for i in range(original_graph.number_of_nodes())]\n",
    "\n",
    "        # streaming_file = data_dir + f'SBM/nodes_{num}/prob_0.7/streaming.txt'\n",
    "        # streaming_file = data_dir + f'facebook/{dataset}/streaming.txt'\n",
    "        streaming_file = data_dir + f'{dataset}/streaming.txt'\n",
    "        with open(streaming_file, 'r') as f:\n",
    "            lines = f.readline()\n",
    "            num = int(lines.strip())\n",
    "        \n",
    "        # ground_truth_file = data_dir + f'SBM/nodes_{num}/prob_0.95/ground_truth.txt'\n",
    "        # ground_truth_cost = calculate_cost(streaming_file, ground_truth_file)\n",
    "        # OPT_file = data_dir + f'SBM/nodes_{num}/prob_0.7/opt_solution.txt'\n",
    "        # OPT_cost = calculate_cost(streaming_file, OPT_file)\n",
    "\n",
    "        icml21_beta, icml21_lambda, icml21_cost = run_icml21Alg(num, streaming_file, output_dir, repeated_times)\n",
    "        print(f'ICML21 cost calculated')\n",
    "        print(f'ICML21 beta: {icml21_beta}, ICML21 lambda: {icml21_lambda}, {icml21_cost[0]}, {icml21_cost[1]}\\n')\n",
    "        with open(statistics_file, 'a') as f:\n",
    "            # f.write(f'synthetic, {num}, {OPT_cost}, 1, N/A, N/A, 701, {icml21_cost[0]}, {icml21_cost[1]}\\n')\n",
    "            f.write(f'{dataset}, {num}, 1, N/A, 701, {icml21_cost[0]}, {icml21_cost[1]}\\n')\n",
    "\n",
    "        approx_ratio = 3\n",
    "        predictor = None\n",
    "\n",
    "        cost_3 = []\n",
    "        for time in range(repeated_times):\n",
    "            # os.makedirs(output_dir + f'SBM/nodes_{num}/prob_0.7', exist_ok=True)\n",
    "            os.makedirs(output_dir + f'{dataset}', exist_ok=True)\n",
    "            output_file = output_dir + f'{dataset}/dynamic_approx_{approx_ratio}.txt'\n",
    "            # output_file = output_dir + f'facebook/{dataset}/dynamic_approx_{approx_ratio}.txt'\n",
    "            # output_file = output_dir + f'{dataset}/dynamic_approx_{approx_ratio}.txt'\n",
    "\n",
    "\n",
    "            random.seed(time)\n",
    "            np.random.seed(time)\n",
    "            alg = DynamicAlgorithm(num, degrees, predictor)\n",
    "            # alg = DynamicAlgorithm(num_of_nodes[dataset], degrees, predictor)\n",
    "            alg.run_approx_algorithm(original_graph, streaming_file, approx_ratio, output_file)\n",
    "            cost_3.append(alg_pay_cost(num, streaming_file, output_file))\n",
    "            # cost_3.append(pay_cost(num_of_nodes[dataset], streaming_file, output_file))\n",
    "            \n",
    "\n",
    "        with open(statistics_file, 'a') as f:\n",
    "            # f.write(f'synthetic, {num}, {ground_truth_cost}, {repeated_times}, N/A, N/A, {approx_ratio}, {np.mean(cost_3)}, {np.std(cost_3)}\\n')\n",
    "            # f.write(f'synthetic, {num}, {OPT_cost}, {repeated_times}, N/A, N/A, {approx_ratio}, {np.mean(cost_3)}, {np.std(cost_3)}\\n')\n",
    "            f.write(f'{dataset}, {num}, {repeated_times}, N/A, {approx_ratio}, {np.mean(cost_3)}, {np.std(cost_3)}\\n')\n",
    "\n",
    "        approx_ratio = 2.06\n",
    "        # for perturbation in perturbation_list:\n",
    "        for cluster_num in cluster_num_list:\n",
    "            # prediction_file = prediction_dir + f\"SBM/nodes_{num}/prob_0.7/prediction_opt_{perturbation}.txt\"\n",
    "            # prediction_file = prediction_dir + f'facebook/{dataset}/prediction_opt_{perturbation}.txt'\n",
    "            prediction_file = prediction_dir + f'{dataset}/prediction_se_{cluster_num}.txt'\n",
    "            predictor = load_prediction(prediction_file)\n",
    "            # beta = calculate_beta(streaming_file, ground_truth_file, prediction_file)\n",
    "            # beta = calculate_beta(streaming_file, OPT_file, prediction_file)\n",
    "\n",
    "            cost_2 = []\n",
    "            for time in range(repeated_times):\n",
    "                # output_file = output_dir + f'SBM/nodes_{num}/prob_0.7/dynamic_num_{num}_approx_{approx_ratio}_perturbation_{perturbation}.txt'\n",
    "                # output_file = output_dir + f'facebook/{dataset}/dynamic_approx_{approx_ratio}_perturbation_{perturbation}.txt'\n",
    "                output_file = output_dir + f'{dataset}/approx_{approx_ratio}_cluster_num_{cluster_num}.txt'\n",
    "\n",
    "                random.seed(time)\n",
    "                np.random.seed(time)\n",
    "                alg = DynamicAlgorithm(num, degrees, predictor)\n",
    "                # alg = DynamicAlgorithm(num_of_nodes[dataset], degrees, predictor)\n",
    "                alg.run_approx_algorithm(original_graph, streaming_file, approx_ratio, output_file)\n",
    "                cost_2.append(alg_pay_cost(num, streaming_file, output_file))\n",
    "                # cost_2.append(pay_cost(num_of_nodes[dataset], streaming_file, output_file))\n",
    "\n",
    "            with open(statistics_file, 'a') as f:\n",
    "                # f.write(f'synthetic, {num}, {ground_truth_cost}, {repeated_times}, {perturbation}, {beta}, {approx_ratio}, {np.mean(cost_2)}, {np.std(cost_2)}\\n')\n",
    "                # f.write(f'synthetic, {num}, {OPT_cost}, {repeated_times}, {perturbation}, {beta}, {approx_ratio}, {np.mean(cost_2)}, {np.std(cost_2)}\\n')\n",
    "                f.write(f'{dataset}, {num}, {repeated_times}, {cluster_num}, {approx_ratio}, {np.mean(cost_2)}, {np.std(cost_2)}\\n')\n",
    "        \n",
    "            cost_min = [min(a, b) for a, b in zip(cost_3, cost_2)]\n",
    "            with open(statistics_file, 'a') as f:\n",
    "                    # f.write(f'synthetic, {num}, {ground_truth_cost}, {repeated_times}, {perturbation}, {beta}, min, {np.mean(cost_min)}, {np.std(cost_min)}\\n')\n",
    "                    # f.write(f'synthetic, {num}, {OPT_cost}, {repeated_times}, {perturbation}, {beta}, min, {np.mean(cost_min)}, {np.std(cost_min)}\\n')\n",
    "                    f.write(f'{dataset}, {num}, {repeated_times}, {cluster_num}, min, {np.mean(cost_min)}, {np.std(cost_min)}\\n')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if __name__ == '__main__':\n",
    "    \n",
    "    output_dir = \"./results/\"\n",
    "    repeated_times = 20\n",
    "\n",
    "    data_dir = \"../data preprocess/SBM\"\n",
    "    # run_synthetic_datasets(data_dir, output_dir, repeated_times)\n",
    "\n",
    "    data_dir = \"../data preprocess/facebook\"\n",
    "    # run_first_type_datasets(data_dir, output_dir, repeated_times)\n",
    "\n",
    "    data_dir = \"../data preprocess\"\n",
    "    # run_second_type_datasets(data_dir, output_dir, repeated_times)\n",
    "\n",
    "    # run_dynamic_alg('../data preprocess/', '../data preprocess/', output_dir, repeated_times)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "CC",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
