{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "collapsed_sections": [
        "q2HzaxcaA8U3"
      ]
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Db-2rolTQD5x"
      },
      "outputs": [],
      "source": [
        "# !pip install pulp\n",
        "# !pip install gurobipy\n",
        "import numpy as np\n",
        "import itertools\n",
        "from typing import Callable\n",
        "from functools import partial\n",
        "import json\n",
        "from operator import add, mul\n",
        "import matplotlib.pyplot as plt\n",
        "import pulp\n",
        "import math\n",
        "import random\n",
        "from math import exp\n",
        "import time\n",
        "from IPython import display\n",
        "import json\n",
        "from os import walk\n",
        "from matplotlib.ticker import FormatStrFormatter\n"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Plot from files"
      ],
      "metadata": {
        "id": "HjFs23zvOKBM"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Distortion with differen $\\phi$ and $m$"
      ],
      "metadata": {
        "id": "ecSPvmZdEtso"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Default title text\n",
        "\n",
        "def export_legend(legend, filename=\"legend.pdf\"):\n",
        "    fig  = legend.figure\n",
        "    fig.canvas.draw()\n",
        "    bbox  = legend.get_window_extent().transformed(fig.dpi_scale_trans.inverted())\n",
        "    fig.savefig(filename, bbox_inches=bbox)             \n",
        "\n",
        "def add_data(old_data, data, mode=0):\n",
        "    all_data = old_data\n",
        "    for rule in data.keys():\n",
        "        if rule not in all_data.keys():\n",
        "            all_data[rule] = data[rule]\n",
        "        else:\n",
        "            rule_data = all_data[rule]\n",
        "            m_num = len(rule_data)\n",
        "            phi_num = len(rule_data[0])\n",
        "            if mode == 1:\n",
        "                for i in range(m_num):\n",
        "                    for j in range(phi_num):\n",
        "                        for k in range(len(rule_data[i][j])):\n",
        "                            rule_data[i][j][k].extend(data[rule][i][j][k])\n",
        "            else:\n",
        "                for i in range(m_num):\n",
        "                    for j in range(phi_num):\n",
        "                        rule_data[i][j].extend(data[rule][i][j])\n",
        "            all_data[rule] = rule_data\n",
        "    return all_data\n",
        "\n",
        "\n",
        "def combine_files(base_path, mode=0):\n",
        "    file_names = [\"{}/\".format(base_path) + x for x in next(walk(base_path), (None, None, []))[2]]\n",
        "    # print(file_names)\n",
        "    all_data = None\n",
        "    for name in file_names:\n",
        "        if name[-4:] == \"json\":\n",
        "            with open(name) as f:\n",
        "                data = 0\n",
        "                data = json.load(f)\n",
        "                if all_data == None:\n",
        "                    all_data = data\n",
        "                else:\n",
        "                    all_data = add_data(all_data, data, mode)\n",
        "    return all_data\n",
        "\n",
        "\n",
        "\n",
        "# fig.legend(loc=7)\n",
        "\n",
        "\n",
        "def plot_from_files(base_path, out_name, max_m = 50, n=100, phi=0.5):\n",
        "    dataset = combine_files(base_path)\n",
        "    all_m = list(range(5, max_m+1, 5))\n",
        "    all_phi = [x/10 for x in range(10)]\n",
        "    all_phi.append(1)\n",
        "\n",
        "    phi_index = all_phi.index(phi)\n",
        "\n",
        "    fig, ax = plt.subplots(figsize=(5, 3))\n",
        "    fig.tight_layout()\n",
        "    plt.rcParams.update({'font.size': 15})\n",
        "    plt.rcParams[\"font.family\"] = \"times\"\n",
        "    plt.rcParams.update({'font.style': 'normal'})\n",
        "    plt.rcParams.update({\n",
        "        \"text.usetex\": True\n",
        "    })\n",
        "    r_num = 0\n",
        "\n",
        "    for r in rules_colors.keys():\n",
        "        if r in dataset.keys():\n",
        "            cl = rules_colors[r][0]\n",
        "            start = time.time()\n",
        "            results = [x[phi_index] for x in dataset[r]]\n",
        "            if len(results[0]) > r_num:\n",
        "                r_num = len(results[0])\n",
        "            # print(r[\"name\"])\n",
        "            avg_results = [np.mean(x) for x in results]\n",
        "            sem_results = [np.std(x)/np.sqrt(np.size(x)) for x in results]\n",
        "\n",
        "\n",
        "            # ax.plot(all_m, avg_results, label=r)\n",
        "            r_name = r if len(rules_colors[r]) == 1 else rules_colors[r][1]\n",
        "            ax.errorbar(all_m, avg_results, label=r_name, yerr=sem_results, color=cl[0], fmt=cl[1:])\n",
        "            \n",
        "    # plt.title(\"Distortion based on $m$\\n$\\phi = {}, n = {}, \\# runs = {}$\".format(phi,n, r_num))\n",
        "    plt.xlabel('$m$') \n",
        "    plt.ylabel('Average distortion') \n",
        "    # ax.set_ylim([1, 10])\n",
        "    plt.tight_layout()\n",
        "    # leg = fig.legend(labelspacing=1.5, loc='center right', bbox_to_anchor=(-.1, .5))\n",
        "    # export_legend(leg)\n",
        "    plt.savefig('outputs/{}.pdf'.format(out_name))\n",
        "\n",
        "\n",
        "def plot_from_files2(base_path, out_name, max_m = 50, n=100, m=25):\n",
        "    dataset = combine_files(base_path)\n",
        "    all_m = list(range(5, max_m+1, 5))\n",
        "    all_phi = [x/10 for x in range(10)]\n",
        "    all_phi.append(1)\n",
        "\n",
        "    m_index = all_m.index(m)\n",
        "\n",
        "    fig, ax = plt.subplots(figsize=(5, 3))\n",
        "    fig.tight_layout()\n",
        "    plt.rcParams.update({'font.size': 15})\n",
        "    plt.rcParams[\"font.family\"] = \"times\"\n",
        "    plt.rcParams.update({'font.style': 'normal'})\n",
        "    plt.rcParams.update({\n",
        "        \"text.usetex\": True\n",
        "    })\n",
        "    r_num = 0\n",
        "\n",
        "    for r in rules_colors.keys():\n",
        "        if r in dataset.keys():\n",
        "            cl = rules_colors[r][0]\n",
        "            start = time.time()\n",
        "\n",
        "            results = dataset[r][m_index]\n",
        "            if len(results[0]) > r_num:\n",
        "                r_num = len(results[0])\n",
        "            # print(r[\"name\"])\n",
        "            avg_results = [np.mean(x) for x in results]\n",
        "            sem_results = [np.std(x)/np.sqrt(np.size(x)) for x in results]\n",
        "\n",
        "\n",
        "            # ax.plot(all_m, avg_results, label=r)\n",
        "            r_name = r if len(rules_colors[r]) == 1 else rules_colors[r][1]\n",
        "            ax.errorbar(all_phi, avg_results, label=r_name, yerr=sem_results, color=cl[0], fmt=cl[1:])\n",
        "            \n",
        "    # plt.title(\"Distortion based on $\\phi$\\n$m = {}, n = {}, \\# runs = {}$\".format(m,n, r_num))\n",
        "    plt.xlabel('$\\phi$') \n",
        "    plt.ylabel('Average distortion') \n",
        "    ax.set_ylim([1, 25])\n",
        "    plt.tight_layout()\n",
        "    # leg = fig.legend(labelspacing=1.5, loc='center right', bbox_to_anchor=(-.1, .5))\n",
        "    # export_legend(leg)\n",
        "    plt.savefig('outputs/{}.pdf'.format(out_name))\n",
        "\n",
        "mypath = \"data\"\n",
        "\n",
        "rules_colors =  {\n",
        "    \"~ Borda\":              [\"r--\", \"R Borda\"],\n",
        "    \"~ Plurality\":          [\"r-\", \"R Plurality\"],\n",
        "    \"~ Harmonic\":           [\"r:\", \"R Harmonic\"],\n",
        "    \"~ 3-app\":              [\"r-.\", \"R 3-Approval\"],\n",
        "    \"Det. Borda\":           [\"g--\", \"D Borda\"],\n",
        "    \"Det. Plurality\":       [\"g-\", \"D Plurality\"],\n",
        "    \"Det. Harmonic\":        [\"g:\", \"D Harmonic\"],\n",
        "    \"Det. 3-app\":           [\"g-.\", \"D 3-Approval\"],\n",
        "    \"3 Borda\":              [\"b--\", \"UR$_3$ Borda\"],\n",
        "    \"3 Plurality\":          [\"b-\", \"UR$_3$ Plurality\"],\n",
        "    \"3 Harmonic\":           [\"b:\", \"UR$_3$ Harmonic\"],\n",
        "    \"3 3-app\":              [\"b-.\", \"UR$_3$ 3-Approval\"],\n",
        "    \"Uniform\":              [\"y-\"],\n",
        "    \"Opt\":                  [\"m\", \"Optimal\"]\n",
        "    }\n",
        "\n",
        "# plot_from_files( \"data\", \"all_rules_phi_5\", 50, 100, .5)\n",
        "plot_from_files2(\"data\", \"all_rules_m_25\", 50, 100, 25)"
      ],
      "metadata": {
        "id": "kBDtiJ7uONGE"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Best $k$ and best $k$ distortion"
      ],
      "metadata": {
        "id": "eyq7DOnXE1mI"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "# fig, ax = plt.subplots(figsize=(15, 8))\n",
        "def set_of_rules_all_k(path, out_name, rules_colors, m, n=50, phi=0.5):\n",
        "    data = combine_files(path, 1)\n",
        "    all_phi = [x/10 for x in range(10)]\n",
        "    all_phi.append(1)\n",
        "    phi_index = all_phi.index(phi)\n",
        "    # fig, ax = plt.subplots(figsize=(15, 8))\n",
        "    run_num = 0\n",
        "    for r in rules_colors.keys():\n",
        "\n",
        "        cl = rules_colors[r]\n",
        "        results = [x[0][phi_index] for x in data[r]]\n",
        "        run_num = max(run_num, len(results[0]))\n",
        "        # print(run_num)\n",
        "        avg_results = [np.mean(x) for x in results]\n",
        "        sem_results = [np.std(x)/np.sqrt(np.size(x)) for x in results]\n",
        "\n",
        "        ax.errorbar(list(range(1, m+1)), avg_results, label=\"{}, $\\phi={}$\".format(r, phi), yerr=sem_results, color=cl[0], fmt=cl[1:])\n",
        "\n",
        "    plt.title(\"Distortion based on k\\nm = {}, n = {}, #runs = {}, phi = {}\".format(m,n,run_num, phi))\n",
        "    plt.xlabel('k') \n",
        "    plt.ylabel('Avg. distortion') \n",
        "    ax.legend()\n",
        "\n",
        "    plt.savefig('outputs/{}.pdf'.format(out_name))\n",
        "\n",
        "\n",
        "\n",
        "def best_k(path, out_name, rules_colors, m, n=100):\n",
        "    data = combine_files(path, 1)\n",
        "    all_phi = [x/10 for x in range(10)]\n",
        "    all_phi.append(1)\n",
        "\n",
        "    fig, ax = plt.subplots(figsize=(5, 3))\n",
        "    fig.tight_layout()\n",
        "    plt.rcParams.update({'font.size': 15})\n",
        "    plt.rcParams[\"font.family\"] = \"times\"\n",
        "    plt.rcParams.update({'font.style': 'normal'})\n",
        "    plt.rcParams.update({\n",
        "        \"text.usetex\": True\n",
        "    })\n",
        "\n",
        "    run_num = 0\n",
        "    for r in rules_colors.keys():\n",
        "\n",
        "        cl = rules_colors[r]\n",
        "        \n",
        "\n",
        "        \n",
        "        final_results = []\n",
        "        for i in range(len(all_phi)):\n",
        "            results = [x[0][i] for x in data[r]]\n",
        "            run_num = max(run_num, len(results[0]))\n",
        "            temp = []\n",
        "            for j in range(len(results[0])):\n",
        "                k_dist = [x[j] for x in results]\n",
        "                temp.append(k_dist.index(min(k_dist))+1)\n",
        "            final_results.append(temp)\n",
        "\n",
        "        avg_results = [np.mean(x) for x in final_results]\n",
        "        sem_results = [np.std(x)/np.sqrt(np.size(x)) for x in final_results]\n",
        "        if r == \"k 3-app\":\n",
        "            r = \"k 3-Approval\"\n",
        "        ax.errorbar(all_phi, avg_results, label=\"UR$_k${}\".format(r[1:]), yerr=sem_results, color=cl[0], fmt=cl[1:])\n",
        "        \n",
        "        # ax.plot(all_phi, final_results, cl, label=r)\n",
        "        # ax.legend(loc='center left', bbox_to_anchor=(1.15, 0.5))\n",
        "        ax.legend(prop={'family':'times', 'size':12})\n",
        "\n",
        "    plt.title(\"Best $k$ for each $\\phi$\\n$m = {}, n = {}, \\# runs = {}$\".format(m,n,run_num))\n",
        "    plt.xlabel('$\\phi$') \n",
        "    plt.ylabel('Best $k$') \n",
        "\n",
        "\n",
        "    plt.savefig('outputs/{}.pdf'.format(out_name))\n",
        "\n",
        "def dist_of_best_k(path, out_name, k_values, rules_colors, m, n=100):\n",
        "    data = combine_files(path, 1)\n",
        "    all_phi = [x/10 for x in range(10)]\n",
        "    all_phi.append(1)\n",
        "\n",
        "    fig, ax = plt.subplots(figsize=(5, 3))\n",
        "    fig.tight_layout()\n",
        "    plt.rcParams.update({'font.size': 15})\n",
        "    plt.rcParams[\"font.family\"] = \"times\"\n",
        "    plt.rcParams.update({'font.style': 'normal'})\n",
        "    plt.rcParams.update({\n",
        "        \"text.usetex\": True\n",
        "    })\n",
        "\n",
        "    run_num = 0\n",
        "    for r in rules_colors.keys():\n",
        "\n",
        "        cl = rules_colors[r]\n",
        "        \n",
        "\n",
        "        \n",
        "        final_results = []\n",
        "        final_errors = []\n",
        "        for i in range(len(all_phi)):\n",
        "            results = [x[0][i] for x in data[r]]\n",
        "            run_num = max(run_num, len(results[0]))\n",
        "            # print(run_num)\n",
        "            avg_results = [np.mean(x) for x in results]\n",
        "\n",
        "            sem_results = [np.std(x)/np.sqrt(np.size(x)) for x in results]\n",
        "            final_results.append(avg_results[k_values[i]-1])\n",
        "            final_errors.append(sem_results[k_values[i]-1])\n",
        "        if r == \"k 3-app\":\n",
        "            r = \"k 3-Approval\"\n",
        "        ax.errorbar(all_phi, final_results, label=\"UR$_k${}\".format(r[1:]), yerr=final_errors, color=cl[0], fmt=cl[1:])\n",
        "        # ax.plot(all_phi, final_results, cl, label=r)\n",
        "    plt.xlabel('$\\phi$') \n",
        "    plt.ylabel('Average distortion') \n",
        "    ax.legend(prop={'family':'times', 'size':12})\n",
        "    plt.savefig('outputs/{}.pdf'.format(out_name))\n",
        "\n",
        "def m_best_k(path, out_name, rules_colors, phi, n=100):\n",
        "    data = combine_files(path, 1)\n",
        "    all_m = list(range(5, 51, 5))\n",
        "\n",
        "    all_phi = [0.1, 0.5, 1]\n",
        "    phi_index = all_phi.index(phi)\n",
        "\n",
        "    fig, ax = plt.subplots(figsize=(5, 3))\n",
        "    fig.tight_layout()\n",
        "    plt.rcParams.update({'font.size': 15})\n",
        "    plt.rcParams[\"font.family\"] = \"times\"\n",
        "    plt.rcParams.update({'font.style': 'normal'})\n",
        "    plt.rcParams.update({\n",
        "        \"text.usetex\": True\n",
        "    })\n",
        "\n",
        "    run_num = 0\n",
        "    for r in rules_colors.keys():\n",
        "\n",
        "        cl = rules_colors[r]\n",
        "        \n",
        "\n",
        "        \n",
        "        final_results = []\n",
        "        for i in range(len(all_m)):\n",
        "            results = [x[i][phi_index] for x in data[r]]\n",
        "            run_num = max(run_num, len(results[0]))\n",
        "            temp = []\n",
        "            for j in range(len(results[0])):\n",
        "                k_dist = [x[j] for x in results]\n",
        "                temp.append(k_dist.index(min(k_dist))+1)\n",
        "            final_results.append(temp)\n",
        "        avg_results = [np.mean(x) for x in final_results]\n",
        "        sem_results = [np.std(x)/np.sqrt(np.size(x)) for x in final_results]\n",
        "        if r == \"k 3-app\":\n",
        "            r = \"k 3-Approval\"\n",
        "        ax.errorbar(all_m, avg_results, label=\"UR$_k${}\".format(r[1:]), yerr=sem_results, color=cl[0], fmt=cl[1:])\n",
        "        \n",
        "        # ax.plot(all_phi, final_results, cl, label=r)\n",
        "        # ax.legend(loc='center left', bbox_to_anchor=(1.15, 0.5))\n",
        "        ax.legend(prop={'family':'times', 'size':12})\n",
        "\n",
        "    # plt.title(\"Best $k$ for each $\\phi$\\n$m = {}, n = {}, \\# runs = {}$\".format(m,n,run_num))\n",
        "    plt.xlabel('$m$') \n",
        "    plt.ylabel('Best $k$') \n",
        "    # ax.set_ylim([2,5])\n",
        "    # ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))\n",
        "    plt.savefig('outputs/{}.pdf'.format(out_name))\n",
        "\n",
        "def dist_m_best_k(path, out_name, rules_colors, phi, n=100):\n",
        "    data = combine_files(path, 1)\n",
        "    all_m = list(range(5, 51, 5))\n",
        "\n",
        "    all_phi = [0.1, 0.5, 1]\n",
        "    phi_index = all_phi.index(phi)\n",
        "\n",
        "    fig, ax = plt.subplots(figsize=(5, 3))\n",
        "    fig.tight_layout()\n",
        "    plt.rcParams.update({'font.size': 15})\n",
        "    plt.rcParams[\"font.family\"] = \"times\"\n",
        "    plt.rcParams.update({'font.style': 'normal'})\n",
        "    plt.rcParams.update({\n",
        "        \"text.usetex\": True\n",
        "    })\n",
        "\n",
        "    run_num = 0\n",
        "    for r in rules_colors.keys():\n",
        "\n",
        "        cl = rules_colors[r]\n",
        "        \n",
        "\n",
        "        \n",
        "        final_results = []\n",
        "        for i in range(len(all_m)):\n",
        "            results = [x[i][phi_index] for x in data[r]]\n",
        "            run_num = max(run_num, len(results[0]))\n",
        "            temp = []\n",
        "            for j in range(len(results[0])):\n",
        "                k_dist = [x[j] for x in results]\n",
        "                temp.append(min(k_dist))\n",
        "            final_results.append(temp)\n",
        "            # print(run_num)\n",
        "            # avg_results = [np.mean(x) for x in results]\n",
        "            # final_results.append(avg_results.index(min(avg_results))+1)\n",
        "        # print(final_results)\n",
        "        avg_results = [np.mean(x) for x in final_results]\n",
        "        sem_results = [np.std(x)/np.sqrt(np.size(x)) for x in final_results]\n",
        "        if r == \"k 3-app\":\n",
        "            r = \"k 3-Approval\"\n",
        "        ax.errorbar(all_m, avg_results, label=\"UR$_k${}\".format(r[1:]), yerr=sem_results, color=cl[0], fmt=cl[1:])\n",
        "        \n",
        "        # ax.plot(all_phi, final_results, cl, label=r)\n",
        "        # ax.legend(loc='center left', bbox_to_anchor=(1.15, 0.5))\n",
        "        ax.legend(prop={'family':'times', 'size':12})\n",
        "\n",
        "    # plt.title(\"Best $k$ for each $\\phi$\\n$m = {}, n = {}, \\# runs = {}$\".format(m,n,run_num))\n",
        "    plt.xlabel('$m$') \n",
        "    plt.ylabel('Average distortion') \n",
        "    # ax.set_ylim([1, 4])\n",
        "\n",
        "    plt.savefig('outputs/{}.pdf'.format(out_name))\n",
        "all_rules = {\n",
        "             \"k Borda\": \"r-\",\n",
        "             \"k Plurality\": \"b-\",\n",
        "             \"k Harmonic\": \"c-\",  \n",
        "             \"k 3-app\": \"g-\", \n",
        "            #  \"Uniform\": \"y-\"\n",
        "            }\n",
        "\n",
        "# set_of_rules_all_k(\"all_k\", \"all_k_phi_7\", all_rules, 25, 100, .1)\n",
        "# m_best_k(\"all_km\", \"m_best_k_phi10\", all_rules, 1, 100)\n",
        "# dist_m_best_k(\"all_km\", \"dist_m_best_k_phi1\", all_rules, .1, 100)\n",
        "dist_of_best_k(\"all_k\", \"dist_best_k\", [1, 2, 2, 2, 3, 3, 4, 5, 6, 9, 25], all_rules, 25, 100)"
      ],
      "metadata": {
        "id": "yMxNEP01NCoF"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "    def v_to_ranking(v, n):\n",
        "    \"\"\"This function computes the corresponding permutation given a decomposition vector.\n",
        "        Parameters\n",
        "        ----------\n",
        "        v: ndarray\n",
        "            Decomposition vector, same length as the permutation, last item must be 0\n",
        "        n: int\n",
        "            Length of the permutation\n",
        "        Returns\n",
        "        -------\n",
        "        ndarray\n",
        "            The permutation corresponding to the decomposition vectors.\n",
        "    \"\"\"\n",
        "    rem = list(range(n))\n",
        "    rank = np.full(n, np.nan)\n",
        "    for i in range(len(v)):\n",
        "        rank[i] = rem[v[i]]\n",
        "        rem.pop(v[i])\n",
        "    return rank.astype(int)\n",
        "\n",
        "\n",
        "#m rankings of size n\n",
        "def sample_mallow(m, n, phi=0.5):\n",
        "\n",
        "    theta = -np.log(phi)\n",
        "\n",
        "    theta = np.full(n-1, theta)\n",
        "\n",
        "    s0 = np.array(range(n))\n",
        "\n",
        "    rnge = np.array(range(n-1))\n",
        "\n",
        "    psi = (1 - np.exp(( - n + rnge )*(theta[ rnge ])))/(1 - np.exp( -theta[rnge]))\n",
        "    vprobs = np.zeros((n, n))\n",
        "    for j in range(n-1):\n",
        "        vprobs[j][0] = 1.0/psi[j]\n",
        "        for r in range(1, n-j):\n",
        "            vprobs[j][r] = np.exp( -theta[j] * r ) / psi[j]\n",
        "    sample = []\n",
        "    vs = []\n",
        "    for samp in range(m):\n",
        "        v = [np.random.choice(n, p=vprobs[i, :]) for i in range(n-1)]\n",
        "        v += [0]\n",
        "        ranking = v_to_ranking(v, n)\n",
        "        sample.append(ranking)\n",
        "\n",
        "    sample = [list(s[s0]) for s in sample]\n",
        "\n",
        "    return sample\n"
      ],
      "metadata": {
        "id": "5qDvnsrDsMj4"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Some small functions that we use later"
      ],
      "metadata": {
        "id": "3ia1gh54BE6u"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#m rankings of size n\n",
        "def sample_mallow(m, n, phi=0.5):\n",
        "\n",
        "    if phi == 1.0:\n",
        "        \n",
        "        rankings = []\n",
        "        for i in range(m):\n",
        "            rankings.append(np.random.permutation(n))\n",
        "\n",
        "        # print(rankings)\n",
        "        return rankings\n",
        "\n",
        "    theta = -np.log(phi)\n",
        "\n",
        "    theta = np.full(n-1, theta)\n",
        "\n",
        "    s0 = np.array(range(n))\n",
        "\n",
        "    rnge = np.array(range(n-1))\n",
        "\n",
        "    psi = (1 - np.exp(( - n + rnge )*(theta[ rnge ])))/(1 - np.exp( -theta[rnge]))\n",
        "    vprobs = np.zeros((n, n))\n",
        "    for j in range(n-1):\n",
        "        vprobs[j][0] = 1.0/psi[j]\n",
        "        for r in range(1, n-j):\n",
        "            vprobs[j][r] = np.exp( -theta[j] * r ) / psi[j]\n",
        "    sample = []\n",
        "    vs = []\n",
        "    for samp in range(m):\n",
        "        v = [np.random.choice(n, p=vprobs[i, :]) for i in range(n-1)]\n",
        "        v += [0]\n",
        "        ranking = v_to_ranking(v, n)\n",
        "        sample.append(ranking)\n",
        "\n",
        "    sample = [list(s[s0]) for s in sample]\n",
        "\n",
        "    return sample\n",
        "\n",
        "def random_utilities(n, m):\n",
        "    ''' Return n uniformly random vector in the m-simplex '''\n",
        "    return np.random.dirichlet(np.ones(m), size=n)\n",
        "\n",
        "\n",
        "def random_preferences(n, m):\n",
        "    temp = []\n",
        "    for i in range(n):\n",
        "        temp.append(list(np.random.permutation(np.arange(m))))\n",
        "\n",
        "    return temp\n",
        "\n",
        "def k_geq_rank_c(pref, c, k):\n",
        "    index = pref.index(c) + 1\n",
        "    if index <= k:\n",
        "        return 1\n",
        "    else:\n",
        "        return 0\n",
        "\n",
        "def generate_preferences(utilities):\n",
        "    temp = []\n",
        "    for u in utilities:\n",
        "        temp.append(list((-u).argsort()))\n",
        "    return temp\n",
        "\n",
        "def distortion(m, c_distribution, utilities):\n",
        "    sw = [0]*m\n",
        "    for u in utilities:\n",
        "        sw = list(map(add, sw, u))\n",
        "    expected_sw = 0\n",
        "    for c, c_pr in enumerate(c_distribution):\n",
        "        expected_sw += c_pr*sw[c]\n",
        "    return max(sw)/expected_sw\n",
        "    \n",
        "\n",
        "def scoring_vec_distribution(scoring_vec, preferences):\n",
        "    m = len(preferences[0])\n",
        "    n = len(preferences)\n",
        "    total_score = [0]*(m)\n",
        "    for pref in preferences:\n",
        "        for i, c in enumerate(pref):\n",
        "            total_score[c] += scoring_vec[i]\n",
        "    return [sc/(n*(sum(scoring_vec))) for sc in total_score]\n",
        "\n",
        "\n",
        "def scoring_vec_k_committee(scoring_vec, preferences, k):\n",
        "    m = len(preferences[0])\n",
        "    n = len(preferences)\n",
        "    total_score = [0]*(m)\n",
        "    for pref in preferences:\n",
        "        for i, c in enumerate(pref):\n",
        "            total_score[c] += scoring_vec[i]\n",
        "    arr = np.array(total_score)\n",
        "    selected = arr.argsort()[-k:][::-1]\n",
        "    prob = [0]*m\n",
        "    for x in selected:\n",
        "        prob[x] = 1/k\n",
        "    return prob\n",
        "\n",
        "def k_approval_distribution(k, preferences):\n",
        "    m = len(preferences[0])\n",
        "    n = len(preferences)\n",
        "    score = [0]*(m)\n",
        "    for pref in preferences:\n",
        "        for c in pref[:k]:\n",
        "            score[c] += 1\n",
        "    return [sc/(n*k) for sc in score]\n",
        "\n",
        "\n",
        "def multi_approval_distribution(k_list, preferences, w=None):\n",
        "    if w == None:\n",
        "        w = [1/len(k_list)]*len(k_list)\n",
        "    temp = [0]*(len(preferences[0]))\n",
        "    for i, k in enumerate(k_list):\n",
        "        temp = list(map(add, temp, [x*w[i] for x in k_approval_distribution(k, preferences)]))\n",
        "    return temp\n"
      ],
      "metadata": {
        "id": "GZsjiId6QFgy"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Instance optimal rule, and new LP to find the worst-case utilities for a distribution."
      ],
      "metadata": {
        "id": "upGOdCTDB4H6"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def check_distortion_is_at_most(d, pref, p):\n",
        "    # print(\"####################\\n\\n\")\n",
        "    m = len(pref[0])\n",
        "\n",
        "    dist_per_candidate = [0 for _ in range(m)]\n",
        "\n",
        "    for i, pref_i in enumerate(pref):\n",
        "        # print(\"@i:\", i)\n",
        "        partial_sum_p = [None for _ in range(m)]\n",
        "        for ind, c in enumerate(pref_i):\n",
        "            partial_sum_p[ind] = d * p[c]\n",
        "            if ind > 0:\n",
        "                partial_sum_p[ind] += partial_sum_p[ind - 1]\n",
        "        # print(\"check ps\", partial_sum_p)\n",
        "\n",
        "        ps_partial_max = [None for _ in range(m)]\n",
        "        for ind in range(m):\n",
        "            ps_partial_max[ind] = -partial_sum_p[ind] / (ind + 1)\n",
        "            if ind > 0:\n",
        "                ps_partial_max[ind] = max(ps_partial_max[ind], ps_partial_max[ind - 1])\n",
        "        # print(\"max ps/ind\", ps_partial_max)\n",
        "\n",
        "        reverse_partial_max = -d * 100\n",
        "        for rev_ind, c in enumerate(reversed(pref_i)):\n",
        "            ind = m - rev_ind - 1\n",
        "            current_delta_i = (1 - partial_sum_p[ind]) / (ind + 1)\n",
        "            reverse_partial_max = max(current_delta_i, reverse_partial_max)\n",
        "            # print(\"@c:\", c , \"check:\", ps_partial_max[ind], reverse_partial_max)\n",
        "            dist_per_candidate[c] += max(ps_partial_max[ind], reverse_partial_max)\n",
        "\n",
        "        # print(\"partial distortion after agent \", i, \" is \", dist_per_candidate)\n",
        "\n",
        "    return max(dist_per_candidate) <= 0\n",
        "\n",
        "\n",
        "def find_distortion_of_distribution(pref, p):\n",
        "    m = len(pref[0])\n",
        "\n",
        "    if not check_distortion_is_at_most(m * m, pref, p):\n",
        "        return m * m\n",
        "\n",
        "    lo_d, hi_d = 0.0, 1.0 * m * m\n",
        "\n",
        "    for binary_search_iter in range(100):\n",
        "        mid_d = (lo_d + hi_d) / 2\n",
        "        if check_distortion_is_at_most(mid_d, pref, p):\n",
        "            hi_d = mid_d\n",
        "        else:\n",
        "            lo_d = mid_d\n",
        "\n",
        "        # if hi_d - lo_d < 1e-16:\n",
        "        #     break\n",
        "\n",
        "    return hi_d\n",
        "    \n",
        "def instance_optimal(pref):\n",
        "    n = len(pref)\n",
        "    m = len(pref[0])\n",
        "    # Create PuLP problem\n",
        "    prob = pulp.LpProblem(\"Distortion\", pulp.LpMinimize)\n",
        "    \n",
        "    # Define delta and P variables\n",
        "    delta_vars = pulp.LpVariable.dicts(\"delta\", [(i, c) for i in range(n) for c in range(m)])\n",
        "    P_vars = pulp.LpVariable.dicts(\"P\", [c for c in range(m)], lowBound=0)\n",
        "    \n",
        "    # Add objective function to problem\n",
        "    prob += pulp.lpSum([P_vars[c] for c in range(m)])\n",
        "    \n",
        "\n",
        "    # Add constraints to problem\n",
        "    for i in range(n):\n",
        "        for c in range(m):\n",
        "            for k in range(1, m+1):\n",
        "                prob += delta_vars[(i, c)] >= k_geq_rank_c(pref[i], c, k)/k - pulp.lpSum([P_vars[pref[i][j]] for j in range(k)])/k\n",
        "                \n",
        "    for c in range(m):\n",
        "        prob += pulp.lpSum([delta_vars[(i, c)] for i in range(n)]) <= 0\n",
        "\n",
        "                \n",
        "    for c in range(m):\n",
        "        prob += P_vars[c] >= 0\n",
        "    \n",
        "    # Solve problem\n",
        "    prob.solve(pulp.GUROBI_CMD( options = [('LogToConsole', 0)]))\n",
        "\n",
        "    # Return optimal values for delta and P\n",
        "    delta_opt = {(i, c): pulp.value(delta_vars[(i, c)]) for i in range(n) for c in range(m)}\n",
        "    P_opt = [pulp.value(P_vars[c]) for c in range(m)]\n",
        "\n",
        "\n",
        "    return [x / sum(P_opt) for x in P_opt]\n",
        "\n",
        "\n",
        "\n",
        "def new_worst_utilities(pref, distribution):\n",
        "    n = len(pref)\n",
        "    m = len(pref[0])\n",
        "    \n",
        "    # Create PuLP problem\n",
        "    prob = pulp.LpProblem(\"Worst_Utilities\", pulp.LpMinimize)\n",
        "    \n",
        "    # Define delta and D variables\n",
        "    D = pulp.LpVariable('D', lowBound=0)\n",
        "    delta_vars = pulp.LpVariable.dicts(\"delta\", [(i, c) for i in range(n) for c in range(m)])\n",
        "    \n",
        "    # Add objective function to problem\n",
        "    prob += D\n",
        "    \n",
        "\n",
        "    # Add constraints to problem\n",
        "    for i in range(n):\n",
        "        for c in range(m):\n",
        "            for k in range(1, m+1):\n",
        "                prob += delta_vars[(i, c)] >= k_geq_rank_c(pref[i], c, k)/k - D*sum([distribution[pref[i][j]] for j in range(k)])/k\n",
        "                \n",
        "    for c in range(m):\n",
        "        prob += pulp.lpSum([delta_vars[(i, c)] for i in range(n)]) <= 0\n",
        "\n",
        "    \n",
        "    # Solve problem\n",
        "    prob.solve(pulp.GUROBI_CMD( options = [('LogToConsole', 0)]))\n",
        "\n",
        "    # Return optimal values for delta and P\n",
        "    delta_opt = {(i, c): pulp.value(delta_vars[(i, c)]) for i in range(n) for c in range(m)}\n",
        "    # P_opt = [pulp.value(P_vars[c]) for c in range(m)]\n",
        "\n",
        "    return pulp.value(D)"
      ],
      "metadata": {
        "id": "e7qYyYZEaYoJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Rules"
      ],
      "metadata": {
        "id": "hSnyffnABZ0F"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "def borda(preferences):\n",
        "    m = len(preferences[0])\n",
        "    return scoring_vec_distribution([m-1-i for i in range(m)], preferences)\n",
        "\n",
        "def borda_k(preferences, k):\n",
        "    m = len(preferences[0])\n",
        "    return scoring_vec_k_committee([m-1-i for i in range(m)], preferences, k)\n",
        "\n",
        "def borda_det(preferences):\n",
        "    return borda_k(preferences, 1)\n",
        "   \n",
        "def plurality(preferences):\n",
        "    return k_approval_distribution(1, preferences)\n",
        "\n",
        "\n",
        "def plurality_k(preferences, k):\n",
        "    m = len(preferences[0])\n",
        "    plu_score = [0]*m\n",
        "    plu_score[0] = 1\n",
        "    ret = scoring_vec_k_committee(plu_score, preferences, k)\n",
        "    # print(ret)\n",
        "    return ret\n",
        "\n",
        "def plurality_det(preferences):\n",
        "    return plurality_k(preferences, 1)\n",
        "\n",
        "\n",
        "def harmonic(preferences):\n",
        "    m = len(preferences[0])\n",
        "    harmonioc_score = [1/(i+1) for i in range(m)]\n",
        "    return scoring_vec_distribution(harmonioc_score, preferences)\n",
        "\n",
        "\n",
        "def harmonic_k(preferences, k):\n",
        "    m = len(preferences[0])\n",
        "    harmonioc_score = [1/(i+1) for i in range(m)]\n",
        "    return scoring_vec_k_committee(harmonioc_score, preferences, k)\n",
        "\n",
        "def harmonic_det(preferences):\n",
        "    return harmonic_k(preferences, 1)\n",
        "\n",
        "def sqrt_m_approval(preferences):\n",
        "    m = len(preferences[0])\n",
        "    return k_approval_distribution(math.isqrt(m), preferences)\n",
        "\n",
        "def unif(preferences, k=0):\n",
        "    m = len(preferences[0])\n",
        "    return [1.0/m]*m\n",
        "\n",
        "def three_app_k(preferences, k):\n",
        "    m = len(preferences[0])\n",
        "    plu_score = [0]*m\n",
        "    plu_score[0] = 1/3\n",
        "    plu_score[1] = 1/3\n",
        "    plu_score[2] = 1/3\n",
        "    return scoring_vec_k_committee(plu_score, preferences, k)\n",
        "\n",
        "def three_app_det(preferences):\n",
        "    ret = three_app_k(preferences, 1)\n",
        "    # print(ret)\n",
        "    return ret"
      ],
      "metadata": {
        "id": "8gPEMK3klY7E"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Experiments"
      ],
      "metadata": {
        "id": "Gaqy9Sr6Bn1K"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "def rule_experiment(rule, inputs):\n",
        "    results = []\n",
        "    for m_inputs in inputs:\n",
        "        m_result = []\n",
        "        for phi_inputs in m_inputs:\n",
        "            r = len(phi_inputs)\n",
        "            phi_result = []\n",
        "            for prefs in phi_inputs:\n",
        "                p = rule(prefs)\n",
        "                # print(\"prob: {}\".format(p))\n",
        "\n",
        "                d = find_distortion_of_distribution(prefs, p)\n",
        "                phi_result.append(d)\n",
        "            m_result.append(phi_result)\n",
        "        results.append(m_result)\n",
        "    return results\n",
        "\n",
        "\n",
        "def set_of_rules_experiment(rules, max_m, n=100, run_num=2):\n",
        "    all_m = list(range(5, max_m+1, 5))\n",
        "    all_phi = [x/10 for x in range(10)]\n",
        "    all_phi.append(1)\n",
        "    inputs = []\n",
        "    for m in all_m:\n",
        "        input_m = []\n",
        "        for phi in all_phi:\n",
        "            input_m_and_phi = []\n",
        "            \n",
        "            for i in range(run_num):\n",
        "                input_m_and_phi.append(sample_mallow(n, m, phi))\n",
        "            input_m.append(input_m_and_phi)\n",
        "        inputs.append(input_m)\n",
        "    print(\"Input Generated!\")\n",
        "    print(\"max_m: {}, n: {}, run_num: {}\".format(m, n, run_num))\n",
        "    \n",
        "    final_results = {}\n",
        "    time0 = time.time()\n",
        "    times = []\n",
        "    for r in rules:\n",
        "        start = time.time()\n",
        "        rule = r[\"func\"]\n",
        "        results = rule_experiment(rule, inputs)\n",
        "        # print(r[\"name\"])\n",
        "        final_results[r[\"name\"]] = results\n",
        "        times.append(int(time.time()-start))\n",
        "        print(times, len(times))\n",
        "     \n",
        "    print(times)\n",
        "    print(\"Total time: {}\".format(int(time.time()-time0)))\n",
        "    return final_results\n",
        "\n",
        "def all_k_for_rule(rule, m, inputs):\n",
        "    results = []\n",
        "    for k in range(1, m+1):      \n",
        "        results.append(rule_experiment(lambda x: rule(x, k), inputs))\n",
        "        # print(m)\n",
        "    # print(\"results: {}\".format(rule_experiment(lambda x: rule(x, m), inputs)))\n",
        "    return results\n",
        "\n",
        "def set_of_rules_best_k(rules, max_m=30, n=100, run_num=2):\n",
        "    all_m = list(range(5, max_m+1, 5))\n",
        "\n",
        "    all_phi = [0.1, 0.5, 1]\n",
        "    inputs = []\n",
        "    final_results = {}\n",
        "    for m in all_m:\n",
        "        input_m = []\n",
        "        for phi in all_phi:\n",
        "            input_m_and_phi = []\n",
        "            for i in range(run_num):\n",
        "                input_m_and_phi.append(sample_mallow(n, m, phi))\n",
        "            input_m.append(input_m_and_phi)\n",
        "        inputs.append(input_m)\n",
        "    print(\"Input Generated!\")\n",
        "    print(\"max_m: {}, n: {}, run_num: {}\".format(max_m, n, run_num))\n",
        "    # print(inputs)\n",
        "    for r in rules:\n",
        "        rule = r[\"func\"]\n",
        "        print(r[\"name\"])\n",
        "        results = all_k_for_rule(rule, m, inputs)\n",
        "        final_results[r[\"name\"]] = results\n",
        "        # print(r[\"name\"])\n",
        "    return final_results\n",
        "\n",
        "all_rules = [\n",
        "{\"name\": \"k Borda\", \"func\": borda_k, \"color\": \"c-\"},\n",
        "             {\"name\": \"k Plurality\", \"func\": plurality_k, \"color\": \"b-\"},\n",
        "             {\"name\": \"k Harmonic\", \"func\": harmonic_k, \"color\": \"r-\"},  \n",
        "             {\"name\": \"k 3-app\", \"func\": three_app_k, \"color\": \"g-\"},\n",
        "             {\"name\": \"Uniform\", \"func\": unif, \"color\": \"y-\"}]\n",
        "\n",
        "ans = set_of_rules_best_k(all_rules, 50, 100, 50)\n",
        "file_name = 'all_km/{}_2.json'.format(random.randint(10000, 99999))\n",
        "with open(file_name, 'w') as f:\n",
        "    json.dump(ans, f)\n",
        "\n",
        "all_rules = [\n",
        "    {\"name\": \"~ Borda\", \"func\": borda, \"color\": \"r--\"},\n",
        "             {\"name\": \"~ Plurality\", \"func\": plurality, \"color\": \"r-\"},\n",
        "             {\"name\": \"~ Harmonic\", \"func\": harmonic, \"color\": \"r:\"},  \n",
        "             {\"name\": \"~ Sqrt(m)-app\", \"func\": sqrt_m_approval, \"color\": \"r-.\"},\n",
        "             {\"name\": \"~ 3-app\", \"func\": lambda x: k_approval_distribution(3, x), \"color\": \"r-.\"},\n",
        "             {\"name\": \"Det. Borda\", \"func\": borda_det, \"color\": \"g--\"},\n",
        "             {\"name\": \"Det. Plurality\", \"func\": plurality_det, \"color\": \"g-\"},\n",
        "             {\"name\": \"Det. Harmonic\", \"func\": harmonic_det, \"color\": \"g:\"},  \n",
        "             {\"name\": \"Det. 3-app\", \"func\": three_app_det, \"color\": \"g-.\"},\n",
        "             {\"name\": \"3 Borda\", \"func\": lambda x: borda_k(x, 3), \"color\": \"b--\"},\n",
        "             {\"name\": \"3 Plurality\", \"func\": lambda x: plurality_k(x, 3), \"color\": \"b-\"},\n",
        "             {\"name\": \"3 Harmonic\", \"func\": lambda x: harmonic_k(x, 3), \"color\": \"b:\"},  \n",
        "             {\"name\": \"3 3-app\", \"func\": lambda x: three_app_k(x, 3), \"color\": \"b-.\"},\n",
        "             {\"name\": \"Uniform\", \"func\": unif, \"color\": \"y-\"},\n",
        "             {\"name\": \"Opt\", \"func\": instance_optimal, \"color\":\"m\"}\n",
        "             ]\n",
        "\n",
        "# ans = set_of_rules_experiment(all_rules, 50, 100, 50)\n",
        "# file_name = 'data/{}_new.json'.format(random.randint(10000, 99999))\n",
        "# with open(file_name, 'w') as f:\n",
        "#     json.dump(ans, f)"
      ],
      "metadata": {
        "id": "itwNDohQsvKJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Old LP"
      ],
      "metadata": {
        "id": "q2HzaxcaA8U3"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "def find_worst_utilities_by_LP(n, m, a, prefs, probs):\n",
        "    # Create LP problem\n",
        "    prob = pulp.LpProblem('Worst_Utilities', pulp.LpMinimize)\n",
        "    # prob.setSolverSelection(pulp.GUROBI_CMD())\n",
        "    \n",
        "    # Define decision variables\n",
        "    X = pulp.LpVariable('X', lowBound=0)\n",
        "    u = pulp.LpVariable.dicts('u', ((i, j) for i in range(n) for j in range(m)), lowBound=0)\n",
        "    \n",
        "    # Define objective function\n",
        "    prob += pulp.lpSum([probs[c]*pulp.lpSum([u[i, c] for i in range(n)]) for c in range(m)])\n",
        "    \n",
        "    # Define constraints\n",
        "    for i in range(n):\n",
        "        prob += pulp.lpSum([u[i, c] for c in range(m)]) == X\n",
        "        for j in range(m-1):\n",
        "            c, b = prefs[i][j], prefs[i][j+1]\n",
        "            prob += u[i, c] >= u[i, b]\n",
        "            \n",
        "    prob += pulp.lpSum([u[i, a] for i in range(n)]) == 1\n",
        "    \n",
        "    # Solve the LP problem\n",
        "    prob.solve(pulp.GUROBI_CMD( options = [('LogToConsole', 0)]))\n",
        "    \n",
        "    u = [[u[i, j].value() for j in range(m)] for i in range(n)]\n",
        "    return [[element/X.value() for element in row] for row in u]\n",
        "\n",
        "\n",
        "\n",
        "def generate_worst_distortion(preferences, distribution):\n",
        "    m = len(preferences[0])\n",
        "    n = len(preferences)\n",
        "    max_utilities = []\n",
        "    max_dist = 0\n",
        "    for c in range(m):\n",
        "        u = find_worst_utilities_by_LP(n, m, c, preferences, distribution)\n",
        "        d = distortion(m, distribution, u)\n",
        "        if d > max_dist:\n",
        "            max_dist = d\n",
        "            max_utilities = u\n",
        "    return max_dist"
      ],
      "metadata": {
        "id": "eiUEsNIdA7V3"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}