{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6309ee0e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.ticker as mtick\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import seaborn as sns\n",
    "from copy import *\n",
    "from decimal import Decimal\n",
    "import math\n",
    "import geopandas\n",
    "import copy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b5b2322",
   "metadata": {},
   "outputs": [],
   "source": [
    "# State list\n",
    "state_list = ['MD', 'MI', 'NC', 'PA', 'TX', 'WI']\n",
    "state_list_2012 = ['MD', 'NC', 'PA', 'TX'] \n",
    "\n",
    "# Shapefile table. This is what geopandas reads, could be different from what ReCom uses.\n",
    "# Difference is ReCom needs the underlying dual graph (so sometimes uses the .json graphs),\n",
    "# whereas geopandas (for compactness purpose) only needs the shape\n",
    "dictStateToShapefile = {\n",
    "    'NC' : 'Shapefiles/NC/NC_VTD.shp',\n",
    "    'PA' : 'Shapefiles/PA/PA.shp',\n",
    "    'TX' : 'Shapefiles/TX/TX_vtds.shp',\n",
    "    'WI' : 'Shapefiles/WI/WI.shp',\n",
    "    'MD' : 'Shapefiles/MD/MD-precincts_abs.shp',\n",
    "    'MI' : 'Shapefiles/MI/mi16_results.shp'\n",
    "}\n",
    "\n",
    "# Real Plan name table; their plan file is just 'XXXX-1'\n",
    "dictStateToRealPlan = {\n",
    "    'NC' : 'NC19',\n",
    "    'PA' : 'PA11',\n",
    "    'TX' : 'TXCD',\n",
    "    'WI' : 'WICD',\n",
    "    'MD' : 'MD11',\n",
    "    'MI' : 'MI11'    \n",
    "}\n",
    "\n",
    "# Plan Prefix table\n",
    "dictStateToPlanPrefix = {\n",
    "    'NC' : '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Default_Ensembles/District_files/NC_districts/plan_',\n",
    "    'PA' : '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Default_Ensembles/District_files/PA_districts/plan_',\n",
    "    'TX' : '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Default_Ensembles/District_files/TX_districts/plan_',\n",
    "    'WI' : '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Default_Ensembles/District_files/WI_districts/plan_',\n",
    "    'MD' : '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Default_Ensembles/District_files/MD_districts/plan_',\n",
    "    'MI' : '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Default_Ensembles/District_files/MI_districts/plan_',\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9d09ede7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# function for computing compactness using geopandas\n",
    "# default setting = minimum compactness (change to mean for average compactness)\n",
    "def compute_compactness(state, shapefile_path, plan_prefix, num_plans, output_filename):\n",
    "    stategraph = geopandas.read_file(shapefile_path)\n",
    "    pp_score_series = np.zeros(num_plans)\n",
    "    for plan_no in range(num_plans):\n",
    "        # note that Julia is 1-indexed\n",
    "        plan = pd.read_csv(plan_prefix + str(plan_no+1) + '.csv', usecols=['Precinct', 'District']) \n",
    "        stategraph['partition'] = plan['District']\n",
    "        \n",
    "        # only keep district boundaries\n",
    "        plangraph = stategraph.dissolve(by='partition')\n",
    "        pp_scores = (4*math.pi*plangraph.area)/(plangraph.boundary.length**2)\n",
    "        \n",
    "        print(pp_scores)\n",
    "        score = pp_scores.min() #score = pp_scores.mean()\n",
    "        pp_score_series[plan_no] = score\n",
    "        \n",
    "    pd.DataFrame(pp_score_series).to_csv(output_filename, index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ca2f237b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# compute ensemble plans' compactness and store in csvs; VERY SLOW, DO NOT RUN IF NOT NECESSARY\n",
    "\n",
    "# for state in ['NC']: # state_list:\n",
    "#     compute_compactness(state = state,\n",
    "#                         shapefile_path = dictStateToShapefile[state],\n",
    "#                         plan_prefix = dictStateToPlanPrefix[state],\n",
    "#                         num_plans = 1,\n",
    "#                         output_filename = '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Default_Ensembles/'+state+'_compactness_scores_avg.csv'\n",
    "#                         )\n",
    "\n",
    "\n",
    "# compute real plans' compactness and store in csvs\n",
    "\n",
    "# for state in state_list:\n",
    "#     compute_compactness(state = state,\n",
    "#                         shapefile_path = dictStateToShapefile[state],\n",
    "#                         plan_prefix = '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Real_Ensembles/'+dictStateToRealPlan[state]+'_',\n",
    "#                         num_plans = 1,\n",
    "#                         output_filename = '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Real_Ensembles/'+dictStateToRealPlan[state]+'_compactness_scores_min.csv'\n",
    "#                         )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a1c29969",
   "metadata": {},
   "outputs": [],
   "source": [
    "# function for parsing maximum DG strength from a row in the DG info csv\n",
    "def parse_max_strength(strength_list):\n",
    "    if strength_list == 'Float16[]':\n",
    "        return 0.5\n",
    "    else:\n",
    "        strength = strength_list[8:-1].split(',')\n",
    "        return strength[-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f757288b",
   "metadata": {},
   "outputs": [],
   "source": [
    "# load the raw dataframes\n",
    "summary_data = {}\n",
    "dg_data = {}\n",
    "\n",
    "for state in state_list:\n",
    "    # Read Ensemble Summary Dataframes\n",
    "    summary_filename = '../../Redistricting_via_Local_Fairness/audit_by_ensemble/2016/' + state + '_2016_default_summary.csv'\n",
    "    DF = pd.read_csv(summary_filename)\n",
    "    DF['Type'] = 'Ensemble' # label all plans ensemble\n",
    "    \n",
    "    # read the ensemble compactness score from file as a column\n",
    "    PPscore = pd.read_csv('../../Redistricting_via_Local_Fairness/audit_by_ensemble/Default_Ensembles/' + state + '_compactness_scores_min.csv')\n",
    "    DF['compactness_min'] = PPscore\n",
    "    \n",
    "    PPscore = pd.read_csv('../../Redistricting_via_Local_Fairness/audit_by_ensemble/Default_Ensembles/' + state + '_compactness_scores.csv')\n",
    "    DF['compactness_avg'] = PPscore\n",
    "    \n",
    "    # convert # of competitive districts to % of competitive districts\n",
    "    DF['% competitive districts'] = DF['# competitive districts']/(DF['# blue districts'] + DF['# red districts'])\n",
    "    \n",
    "    # read deviating group info\n",
    "    dg_filename = '../../Redistricting_via_Local_Fairness/audit_by_ensemble/2016/' + state + '_2016_default_DGs.csv'\n",
    "    DG = pd.read_csv(dg_filename)\n",
    "    \n",
    "    # parse the maximum strength of deviating groups for each plan, because we did not put this in the summary\n",
    "    DF['max DG strength'] = DG['Unhappy_pct'].apply(parse_max_strength).apply(Decimal)\n",
    "    \n",
    "    #\n",
    "    #\n",
    "    #\n",
    "    \n",
    "    # Read Real Plan Summary Dataframes\n",
    "    real_plan_filename = '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Real_Ensembles/' + dictStateToRealPlan[state] + '_auditing_summary.csv'\n",
    "    realDF = pd.read_csv(real_plan_filename)\n",
    "    realDF['Type'] = 'Real' # label the plan real\n",
    "    \n",
    "    # read the ensemble compactness score from file as a column\n",
    "    realPPscore = pd.read_csv('../../Redistricting_via_Local_Fairness/audit_by_ensemble/Real_Ensembles/' + dictStateToRealPlan[state] +'_compactness_scores_min.csv')\n",
    "    realDF['compactness_min'] = realPPscore\n",
    "    \n",
    "    # read the ensemble compactness score from file as a column\n",
    "    realPPscore = pd.read_csv('../../Redistricting_via_Local_Fairness/audit_by_ensemble/Real_Ensembles/' + dictStateToRealPlan[state] +'_compactness_scores.csv')\n",
    "    realDF['compactness_avg'] = realPPscore\n",
    "    \n",
    "    # convert # of competitive districts to % of competitive districts;\n",
    "    realDF['% competitive districts'] = realDF['# competitive districts']/(realDF['# blue districts'] + realDF['# red districts'])\n",
    "\n",
    "    # read deviating group info\n",
    "    realdg_filename = '../../Redistricting_via_Local_Fairness/audit_by_ensemble/Real_Ensembles/' + dictStateToRealPlan[state] + '_deviating_group_info.csv'\n",
    "    realDG = pd.read_csv(realdg_filename)\n",
    "    \n",
    "    # parse the maximum strength of deviating groups for each plan, because we did not put this in the summary\n",
    "    realDF['max DG strength'] = realDG['Unhappy_pct'].apply(parse_max_strength).apply(Decimal)\n",
    "    \n",
    "    \n",
    "    # Merge two dataframes\n",
    "    DF = pd.concat([DF, realDF], ignore_index=True)\n",
    "    \n",
    "    # convert B/R seatshares and number of competitive districts into percentages\n",
    "    DF['B%'] = DF['# blue districts']/(DF['# blue districts'] + DF['# red districts'])\n",
    "    \n",
    "    # rank the plans\n",
    "    DF['Unfair rank'] = DF['% voters unhappy in DGs'].rank(method='min')\n",
    "    print(state + ': the rank of the real plan is')\n",
    "    print(DF[DF['Type'] == 'Real']['Unfair rank'])\n",
    "    \n",
    "    summary_data[state] = DF\n",
    "    dg_data[state] = DG\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "71f96090",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate the overview table in the paper (number of plans in each state's ensemble without c-deviating groups)\n",
    "# Float number rounding error happens between Julia auditing and csv, so it does not always agree with the neurips submission table\n",
    "# but the difference should be small enough\n",
    "c_values = [0.5, 0.51, 0.52, 0.55]\n",
    "cFairPlans = pd.DataFrame(columns = c_values, index = state_list)\n",
    "for state in state_list:\n",
    "    DF = summary_data[state]\n",
    "    for c in c_values:\n",
    "        cFairPlans[c][state] = DF[(DF['max DG strength'] <= c) & (DF['Type'] == 'Ensemble')].shape[0]\n",
    "print(cFairPlans)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8ab4328d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make the palette from sns.color_palette(\"Paired\")\n",
    "myPalette = []\n",
    "for i in range(6):\n",
    "    myPalette.append('black') # for real plans\n",
    "    myPalette.append(sns.color_palette(\"Paired\")[2*i])\n",
    "    myPalette.append(sns.color_palette(\"Paired\")[2*i+1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b49f94fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# An abstract function for plotting a box plot\n",
    "def plotBox(data, measure, ylim, xlabel = '', ylabel = '', usePercentages = True, filename=None):\n",
    "    sns.set()\n",
    "    sns.set_style(\"white\")\n",
    "    sns.set_context(\"paper\", font_scale=2.5)\n",
    "\n",
    "    fig = sns.catplot(data=data, kind=\"box\", x='set', y=measure,\n",
    "            palette=myPalette, \n",
    "            order = [\n",
    "                'MD-real', 'MD-fair', 'MD-whole',\n",
    "                'MI-real', 'MI-fair', 'MI-whole',\n",
    "                'NC-real', 'NC-fair', 'NC-whole',\n",
    "                'PA-real', 'PA-fair', 'PA-whole',\n",
    "                'TX-real', 'TX-fair', 'TX-whole',\n",
    "                'WI-real', 'WI-fair', 'WI-whole',\n",
    "            ],\n",
    "            height=4, aspect=7.5, legend=True,\n",
    "            facet_kws={'sharey':True,'sharex':False}\n",
    "            )\n",
    "\n",
    "    for ax in fig.axes.flat:\n",
    "        if usePercentages:\n",
    "            ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))\n",
    "    fig.set(xlabel=xlabel, ylabel=ylabel)\n",
    "    fig.set(ylim=ylim)\n",
    "\n",
    "    if filename:\n",
    "        plt.tight_layout()\n",
    "        plt.savefig(filename, dpi=300, bbox_inches='tight')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "64faf1ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate the box plots to compare fair plans vs. whole ensemble\n",
    "fairThreshold = 50 # top 5% fairest\n",
    "listMeasure = ['B%', 'avg partisanship', '% competitive districts', 'compactness_min', 'compactness_avg']\n",
    "\n",
    "lstDf = []\n",
    "for state in state_list:\n",
    "    DF = summary_data[state]\n",
    "    EnsembleSorted = DF[DF['Type'] == 'Ensemble'].sort_values(by = '% voters unhappy in DGs')[listMeasure].copy() # sort plans by fairest first and drop unnecessary columns\n",
    "    Real = DF[DF['Type'] == 'Real'].copy()\n",
    "    \n",
    "    # fair plans\n",
    "    fairPlans = EnsembleSorted.head(50).copy()\n",
    "    fairPlans['set'] = state+'-fair'\n",
    "    lstDf.append(fairPlans)\n",
    "    \n",
    "    # whole ensemble\n",
    "    EnsembleSorted['set'] = state+'-whole'\n",
    "    lstDf.append(EnsembleSorted)\n",
    "    \n",
    "    # real plan\n",
    "    Real['set'] = state+'-real'\n",
    "    lstDf.append(Real)\n",
    "    \n",
    "agg = pd.concat(lstDf, ignore_index = True)\n",
    "\n",
    "# Plot a general box plot\n",
    "plotBox(data = agg, measure = 'B%', ylim = (0.2,1), xlabel = '', ylabel = 'Blue seat share %', filename='seat.png')\n",
    "plotBox(data = agg, measure = 'avg partisanship', ylim = (0.55,0.72), xlabel = '', ylabel = 'Average partisanship', filename='partisanship.png')\n",
    "plotBox(data = agg, measure = '% competitive districts', ylim = (-0.01, 0.6), xlabel = '', ylabel = '% of competitive districts ', filename='competitiveness.png')\n",
    "plotBox(data = agg, measure = 'compactness_min', ylim = (0, 0.2), xlabel = '', ylabel = 'Min compactness ', usePercentages = False, filename='compactness_min.png')\n",
    "plotBox(data = agg, measure = 'compactness_avg', ylim = (0, 0.3), xlabel = '', ylabel = 'Avg compactness ', usePercentages = False, filename='compactness_avg.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24cac95c",
   "metadata": {},
   "outputs": [],
   "source": [
    "# An abstract function for plotting a bar plot\n",
    "def plotHistogram(data, xlabel = '', ylabel = '', filename=None):\n",
    "    sns.set()\n",
    "    sns.set_style(\"white\")\n",
    "    sns.set_context(\"paper\", font_scale=2.5)\n",
    "\n",
    "    fig = sns.displot(data=data, kind=\"hist\", x='% voters unhappy in DGs',\n",
    "            binrange = (0, 0.3), binwidth = 0.05, hue = 'Set of plans', col= 'State',\n",
    "            stat = 'probability', common_norm = False, element=\"step\", #multiple = 'dodge',# #shrink=.8,\n",
    "            height=4, aspect=1.2, legend=True\n",
    "            )\n",
    "\n",
    "    for ax in fig.axes.flat:\n",
    "        ax.yaxis.set_major_formatter(mtick.PercentFormatter(xmax=1))\n",
    "    fig.set(xlabel=xlabel, ylabel=ylabel)\n",
    "    fig.set(ylim=(0,1))\n",
    "    fig.set_titles(\"{col_name}\")\n",
    "\n",
    "    if filename:\n",
    "        plt.savefig(filename, dpi=300, bbox_inches='tight')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3e9d0a12",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate the bar plots in the 2012 vs 16 experiments\n",
    "summary_data_12 = {}\n",
    "for state in state_list_2012:\n",
    "    # Read 2012 Ensemble Summary Dataframes\n",
    "    summary_filename = '../../Redistricting_via_Local_Fairness/audit_by_ensemble/2012/' + state + '_2012_auditing_summary.csv'\n",
    "    DF = pd.read_csv(summary_filename)\n",
    "    summary_data_12[state] = DF\n",
    "    \n",
    "Scores16 = []\n",
    "Scores12 = []\n",
    "for state in state_list_2012:\n",
    "    DF16 = summary_data[state][summary_data[state]['Type'] == 'Ensemble'].copy()\n",
    "    DF16['State'] = state\n",
    "    DF12 = summary_data_12[state]\n",
    "    DF12['State'] = state\n",
    "    ListFairPlans16 = (DF16['% voters unhappy in DGs'] == 0)\n",
    "    ListFairPlans12 = (DF12['% voters unhappy in DGs'] == 0)\n",
    "\n",
    "    # extract 2016 fair plans' 2012 score\n",
    "    Fair16_score12 = DF12[ListFairPlans16].copy()\n",
    "    Fair16_score12['Set of plans'] = 'Fair 2016 Plans'\n",
    "    Scores12.append(Fair16_score12)\n",
    "    \n",
    "    All_score12 = DF12.copy()\n",
    "    All_score12['Set of plans'] = 'Whole Ensemble'\n",
    "    Scores12.append(All_score12)\n",
    "    \n",
    "    # extract 2012 fair plans' 2016 score\n",
    "    Fair12_score16 = DF16[ListFairPlans12].copy()\n",
    "    Fair12_score16['Set of plans'] = 'Fair 2012 Plans'\n",
    "    Scores16.append(Fair12_score16)\n",
    "    \n",
    "    All_score16 = DF16.copy()\n",
    "    All_score16['Set of plans'] = 'Whole Ensemble'\n",
    "    Scores16.append(All_score16)\n",
    "\n",
    "agg12 = pd.concat(Scores12, ignore_index = True)\n",
    "agg16 = pd.concat(Scores16, ignore_index = True)\n",
    "\n",
    "# Plot the histograms\n",
    "plotHistogram(data = agg12, xlabel = 'unf score (2012)', ylabel = '% plans', filename='2012.png')\n",
    "plotHistogram(data = agg16, xlabel = 'unf score (2016)', ylabel = '% plans', filename='2016.png')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ab3287a",
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_districting(state, map_no):\n",
    "    districts = geopandas.read_file(\"../../Redistricting_via_Local_Fairness/\"+dictStateToShapefile[state])\n",
    "\n",
    "    col_list = [\"Precinct\", \"District\", \"Partisanship\"]\n",
    "    actual_districting = pd.read_csv(\"../../Redistricting_via_Local_Fairness/District_files/\" + state + \"_districts/plan_\" + str(map_no) + \".csv\", usecols=col_list)\n",
    "\n",
    "    districts[\"partition\"] = actual_districting[\"District\"]\n",
    "    districts[\"partisanship\"] = actual_districting[\"Partisanship\"]\n",
    "\n",
    "    #only keep district boundaries\n",
    "    districting_df = districts.dissolve(by=\"partition\")\n",
    "\n",
    "    plt.box(False)\n",
    "    ax = districting_df.plot(column=\"partisanship\", cmap=\"coolwarm\", edgecolor='k', linewidth=.35)\n",
    "    ax.axis('off')\n",
    "    plt.savefig(\"figures/map_\" + str(map_no), transparent=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f50ea5a3-6cd7-48e3-a36f-a08f3a76c1b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "DEVIATING_GROUP_PATH = \"../../Redistricting_via_Local_Fairness/audit_by_ensemble/2016/\"\n",
    "\n",
    "def plot_all_dgs(state, map_no):\n",
    "        DISTRICTING_PATH = \"../../Redistricting_via_Local_Fairness/District_files/\" + state + \"_districts/plan_\" \n",
    "        \n",
    "        geographic_info = geopandas.read_file(\"../../Redistricting_via_Local_Fairness/\"+dictStateToShapefile[state])\n",
    "        col_list = [\"Map\", \"District\", \"Total_Pop\", \"Type\", \"Unhappy_pct\"]\n",
    "        info = pd.read_csv(DEVIATING_GROUP_PATH + state + \"_2016_default_DGs.csv\")\n",
    "\n",
    "        audited_map = info.iloc[map_no-1]\n",
    "        if audited_map[\"Map\"] == \"Int64[]\":\n",
    "                return\n",
    "        stripped = audited_map[\"Map\"][1:len(audited_map[\"Map\"])-1]\n",
    "\n",
    "        maps = stripped.split(\",\")\n",
    "        maps = [int(x) for x in maps]\n",
    "\n",
    "        stripped = audited_map[\"District\"][1:len(audited_map[\"District\"])-1]\n",
    "        districts = stripped.split(\",\")\n",
    "        districts = [int(x) for x in districts]\n",
    "\n",
    "\n",
    "        ### plotting info\n",
    "        col_list = [\"Precinct\", \"District\", \"Partisanship\"]\n",
    "        actual_districting = pd.read_csv(DISTRICTING_PATH + str(map_no) + \".csv\", usecols=col_list)\n",
    "\n",
    "        geographic_info[\"partition\"] = actual_districting[\"District\"]\n",
    "        geographic_info[\"partisanship\"] = actual_districting[\"Partisanship\"]\n",
    "\n",
    "        #only keep district boundaries\n",
    "        districting_df = geographic_info.dissolve(by=\"partition\")\n",
    "\n",
    "\n",
    "        for i in range(len(maps)):\n",
    "            col_list = [\"Precinct\", \"District\", \"Partisanship\"]\n",
    "            deviating_districting = pd.read_csv(DISTRICTING_PATH + str(maps[i]) + \".csv\", usecols=col_list)\n",
    "\n",
    "            district_num = districts[i]\n",
    "            list = []\n",
    "\n",
    "            for j in deviating_districting.index:\n",
    "                if deviating_districting[\"District\"][j] == district_num:\n",
    "                    list.append(deviating_districting[\"Precinct\"][j])\n",
    "\n",
    "            deviating_group = pd.DataFrame(list)\n",
    "            deviating_group.columns = [\"precincts\"]\n",
    "\n",
    "            drop = []\n",
    "            dev_group = [0]*len(deviating_districting)\n",
    "\n",
    "            for prec in deviating_group[\"precincts\"]:\n",
    "                dev_group[prec-1] = 1\n",
    "\n",
    "            for j in range(len(dev_group)):\n",
    "                if dev_group[j] == 0:\n",
    "                    drop.append(j)\n",
    "\n",
    "\n",
    "            geographic_info[\"dev_group\"] = dev_group\n",
    "            only_group =  geographic_info[geographic_info[\"dev_group\"] == 1]\n",
    "            only_group = only_group.dissolve(by=\"dev_group\")\n",
    "\n",
    "            #plotting\n",
    "            plt.box(False)\n",
    "            ax = districting_df.plot(column=\"partisanship\", cmap=\"coolwarm\", edgecolor='k', linewidth=.0, alpha=.5)\n",
    "            ax.axis('off')\n",
    "\n",
    "            ####dev. group\n",
    "            # ax.set_title(\"Map \" +  str(map_no) + \" \"  + \"Deviating Group\")\n",
    "            only_group.boundary.plot(ax=ax, facecolor=None, edgecolor='k', linewidth = 1.25,hatch='/')\n",
    "            plt.savefig(\"figures/map_\"+ str(map_no) +\"_dev_group_\" + str(maps[i]) +\"_\" +str(district_num)+ \".png\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c7e122ee-93b4-4489-aad3-f4cf52fcdd9e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# WILL plot ALL deviating groups of a plan, one per figure file... DO NOT RUN IF NOT NECESSARY\n",
    "'''  \n",
    "import random\n",
    "to_plot = random.sample(range(1000), 1)\n",
    "\n",
    "for i in to_plot:\n",
    "    plot_districting('MI', i)\n",
    "    plot_all_dgs('MI', i)'''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "09f107cf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# get the descriptive statistics about North Carolina districts' compactness scores\n",
    "pp_lb_nc = agg[agg['set']=='NC-whole']['compactness_min'].min()\n",
    "print(pp_lb_nc)\n",
    "\n",
    "pp_mean_nc = agg[agg['set']=='NC-whole']['compactness_avg'].mean()\n",
    "print(pp_mean_nc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2fcebea8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot heat map for fair plans in NC\n",
    "dg_info = pd.read_csv('nc_dp_compactness_good_plans_trees_1_5.csv', header = 0)\n",
    "\n",
    "compactness = []\n",
    "strength = []\n",
    "\n",
    "for i in range(len(dg_info)):\n",
    "    current_dg = dg_info.iloc[i]\n",
    "    current_strength = current_dg[\"Strength\"]\n",
    "    current_pp_score = current_dg[\"Polsby Score\"]\n",
    "    if (current_pp_score >= 0):\n",
    "        compactness.append(current_pp_score)\n",
    "        strength.append(current_strength)\n",
    "\n",
    "print(\"There are \" + str(len(compactness)) + \" total DGs.\")\n",
    "\n",
    "heatmap, xedges, yedges = np.histogram2d(compactness, strength, bins = 50, range = [[0.0, 0.25], [0.5, 0.75]])\n",
    "heatmap = heatmap / 10\n",
    "extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]\n",
    "\n",
    "plt.clf()\n",
    "cmap = copy.copy(plt.get_cmap('cool'))\n",
    "cmap.set_under('white')\n",
    "plt.imshow(heatmap.T, extent=extent, origin='lower', cmap=cmap, vmin = 0.0000001, vmax = 5, interpolation='nearest')\n",
    "\n",
    "#for good maps\n",
    "plt.annotate('D1(.152, .512)', xycoords='data', xy=(.152, .512), xytext=(.16, .55), arrowprops=dict(arrowstyle='->'))\n",
    "plt.annotate('D2(.020, .572)', xycoords='data', xy=(.020, .572), xytext=(.04, .6), arrowprops=dict(arrowstyle='->'))\n",
    "plt.vlines(pp_lb_nc, 0.5, 0.75, linestyles='dashed', color='black')\n",
    "plt.vlines(pp_mean_nc, 0.5, 0.75, linestyles='dotted', color='black')\n",
    "\n",
    "plt.title(\"\")\n",
    "plt.xlabel('Polsby-Popper score')\n",
    "plt.ylabel('Strength')\n",
    "plt.savefig('dg_dp_good.png', dpi=300)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "177f59e6",
   "metadata": {},
   "outputs": [],
   "source": [
    "# plot heat map for unfair plans in NC\n",
    "dg_info = pd.read_csv('nc_dp_compactness_bad_plans_trees_1_5.csv', header = 0)\n",
    "\n",
    "compactness = []\n",
    "strength = []\n",
    "\n",
    "for i in range(len(dg_info)):\n",
    "    current_dg = dg_info.iloc[i]\n",
    "    current_strength = current_dg[\"Strength\"]\n",
    "    current_pp_score = current_dg[\"Polsby Score\"]\n",
    "    if (current_pp_score >= 0):\n",
    "        compactness.append(current_pp_score)\n",
    "        strength.append(current_strength)\n",
    "\n",
    "print(\"There are \" + str(len(compactness)) + \" total DGs.\")\n",
    "\n",
    "heatmap, xedges, yedges = np.histogram2d(compactness, strength, bins = 50, range = [[0.0, 0.25], [0.5, 0.75]])\n",
    "heatmap = heatmap / 10\n",
    "extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]\n",
    "\n",
    "plt.clf()\n",
    "cmap = copy.copy(plt.get_cmap('cool'))\n",
    "cmap.set_under('white')\n",
    "plt.imshow(heatmap.T, extent=extent, origin='lower', cmap=cmap, vmin = 0.0000001, vmax = 5, interpolation='nearest')\n",
    "\n",
    "#for good maps\n",
    "plt.annotate('D3(.234, .593)', xycoords='data', xy=(.234, .593), xytext=(.165, .63), arrowprops=dict(arrowstyle='->'))\n",
    "plt.annotate('D4(.161, .730)', xycoords='data', xy=(.161, .730), xytext=(.165, .7), arrowprops=dict(arrowstyle='->'))\n",
    "plt.vlines(pp_lb_nc, 0.5, 0.75, linestyles='dashed', color='black')\n",
    "plt.vlines(pp_mean_nc, 0.5, 0.75, linestyles='dotted', color='black')\n",
    "\n",
    "plt.title(\"\")\n",
    "plt.xlabel('Polsby-Popper score')\n",
    "plt.ylabel('Strength')\n",
    "plt.savefig('dg_dp_bad.png', dpi=300)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d52e32e8",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
