{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "CheckList.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "toc_visible": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "5f44a54d876b46a8b163562f99196208": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_28143c68c4f343afb616ec490d979c8e",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_dff2a5f0486b47bca81c731a26694fd8",
              "IPY_MODEL_7e04d3d0e459439796f321fce45d4b59"
            ]
          }
        },
        "28143c68c4f343afb616ec490d979c8e": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "dff2a5f0486b47bca81c731a26694fd8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_49652ad678704e92927573490bd3dd1e",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 732,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 732,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_ec8aaf93f75343599ca1f76a1fa26e6f"
          }
        },
        "7e04d3d0e459439796f321fce45d4b59": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_1ae62efc439a41198064e8f3baeacda4",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 732/732 [00:00&lt;00:00, 1.13kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_fcb19d385beb4a5ea0e2c49ff25038a4"
          }
        },
        "49652ad678704e92927573490bd3dd1e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "ec8aaf93f75343599ca1f76a1fa26e6f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "1ae62efc439a41198064e8f3baeacda4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "fcb19d385beb4a5ea0e2c49ff25038a4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "c8bbb81a29bf483398c803a3137df2cf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_e6a20e45c04d4b7ca4403c5b2fdfcbcd",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_039a48b64fbf4476aa2239e6a2b011b0",
              "IPY_MODEL_2ffd8bdb4c334db2b16738d2ad8c4b47"
            ]
          }
        },
        "e6a20e45c04d4b7ca4403c5b2fdfcbcd": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "039a48b64fbf4476aa2239e6a2b011b0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_d85c54a0e5cf4697ad307d765ac35632",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 46747112,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 46747112,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_50bf4fff68d8429c9500c74e17793901"
          }
        },
        "2ffd8bdb4c334db2b16738d2ad8c4b47": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_5bd4457b1f64454aac2bc4d2d7ff0d66",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 46.7M/46.7M [00:03&lt;00:00, 14.4MB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_5f6b4c1fbb404f5eb570b30435f63a99"
          }
        },
        "d85c54a0e5cf4697ad307d765ac35632": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "50bf4fff68d8429c9500c74e17793901": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "5bd4457b1f64454aac2bc4d2d7ff0d66": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "5f6b4c1fbb404f5eb570b30435f63a99": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "9a436c87814444ec8a00368cb7fa30d6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_2969c124dc9c4b4ea621431c35d90243",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_89a6918e41624f79be4f3e8526ce1da2",
              "IPY_MODEL_6d474c8bdee0468785af9b8497108075"
            ]
          }
        },
        "2969c124dc9c4b4ea621431c35d90243": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "89a6918e41624f79be4f3e8526ce1da2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_0782a22fe2ed4dfcab738484bbdc27b0",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 760289,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 760289,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_708ca62908f64aa58e65888656d82631"
          }
        },
        "6d474c8bdee0468785af9b8497108075": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_7e9918670348461481b2de18f32fd7c4",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 760k/760k [00:01&lt;00:00, 470kB/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_ea96c90a7bd441acbbd1be6aca5c53c0"
          }
        },
        "0782a22fe2ed4dfcab738484bbdc27b0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "708ca62908f64aa58e65888656d82631": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "7e9918670348461481b2de18f32fd7c4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "ea96c90a7bd441acbbd1be6aca5c53c0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "c0b1cc03c14d47918b8e395696961a14": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_c1954c05bb8f4d5eb6aa04364c860d62",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_208fbd4e59d24a718b82fb081b4c3726",
              "IPY_MODEL_5d57c423d2214a89831a3822231e4bb6"
            ]
          }
        },
        "c1954c05bb8f4d5eb6aa04364c860d62": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "208fbd4e59d24a718b82fb081b4c3726": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_306c436ffd944b74a97956017cf32a74",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 156,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 156,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_885fdeeb4bc5422a9378408f0f2bccc9"
          }
        },
        "5d57c423d2214a89831a3822231e4bb6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_1f8a35ac9c8a4f08ae992b98426d1054",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 156/156 [00:00&lt;00:00, 353B/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_f599408a797f4508a27228760d6555e7"
          }
        },
        "306c436ffd944b74a97956017cf32a74": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "885fdeeb4bc5422a9378408f0f2bccc9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "1f8a35ac9c8a4f08ae992b98426d1054": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "f599408a797f4508a27228760d6555e7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "d274695d190e4593baa492a83e8bf4a0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "state": {
            "_view_name": "HBoxView",
            "_dom_classes": [],
            "_model_name": "HBoxModel",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "box_style": "",
            "layout": "IPY_MODEL_b0ee26f638504d0eb3ef2b8e20319faf",
            "_model_module": "@jupyter-widgets/controls",
            "children": [
              "IPY_MODEL_a760693ab85e432cb0ef9ec5d86603f0",
              "IPY_MODEL_f4167f262d4b4782817e641b25cebae1"
            ]
          }
        },
        "b0ee26f638504d0eb3ef2b8e20319faf": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "a760693ab85e432cb0ef9ec5d86603f0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "state": {
            "_view_name": "ProgressView",
            "style": "IPY_MODEL_2431343e67d04980befd770a7655743c",
            "_dom_classes": [],
            "description": "Downloading: 100%",
            "_model_name": "FloatProgressModel",
            "bar_style": "success",
            "max": 25,
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": 25,
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "orientation": "horizontal",
            "min": 0,
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_95c5f3954dd145b99ec651ea7364facb"
          }
        },
        "f4167f262d4b4782817e641b25cebae1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "state": {
            "_view_name": "HTMLView",
            "style": "IPY_MODEL_f5020dd2f7834eb4961853411b665202",
            "_dom_classes": [],
            "description": "",
            "_model_name": "HTMLModel",
            "placeholder": "​",
            "_view_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "value": " 25.0/25.0 [00:00&lt;00:00, 217B/s]",
            "_view_count": null,
            "_view_module_version": "1.5.0",
            "description_tooltip": null,
            "_model_module": "@jupyter-widgets/controls",
            "layout": "IPY_MODEL_17daa2c463c947db9eabc19225014325"
          }
        },
        "2431343e67d04980befd770a7655743c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "ProgressStyleModel",
            "description_width": "initial",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "bar_color": null,
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "95c5f3954dd145b99ec651ea7364facb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        },
        "f5020dd2f7834eb4961853411b665202": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_view_name": "StyleView",
            "_model_name": "DescriptionStyleModel",
            "description_width": "",
            "_view_module": "@jupyter-widgets/base",
            "_model_module_version": "1.5.0",
            "_view_count": null,
            "_view_module_version": "1.2.0",
            "_model_module": "@jupyter-widgets/controls"
          }
        },
        "17daa2c463c947db9eabc19225014325": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "state": {
            "_view_name": "LayoutView",
            "grid_template_rows": null,
            "right": null,
            "justify_content": null,
            "_view_module": "@jupyter-widgets/base",
            "overflow": null,
            "_model_module_version": "1.2.0",
            "_view_count": null,
            "flex_flow": null,
            "width": null,
            "min_width": null,
            "border": null,
            "align_items": null,
            "bottom": null,
            "_model_module": "@jupyter-widgets/base",
            "top": null,
            "grid_column": null,
            "overflow_y": null,
            "overflow_x": null,
            "grid_auto_flow": null,
            "grid_area": null,
            "grid_template_columns": null,
            "flex": null,
            "_model_name": "LayoutModel",
            "justify_items": null,
            "grid_row": null,
            "max_height": null,
            "align_content": null,
            "visibility": null,
            "align_self": null,
            "height": null,
            "min_height": null,
            "padding": null,
            "grid_auto_rows": null,
            "grid_gap": null,
            "max_width": null,
            "order": null,
            "_view_module_version": "1.2.0",
            "grid_template_areas": null,
            "object_position": null,
            "object_fit": null,
            "grid_auto_columns": null,
            "margin": null,
            "display": null,
            "left": null
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "metadata": {
        "id": "S4ig1mY1Cgra"
      },
      "source": [
        "# Install python dependencies\n",
        "!pip install checklist torch transformers sentencepiece"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "2T5-XS5BCvp7"
      },
      "source": [
        "!python -m spacy download en_core_web_sm"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "M3THQYu1CblP"
      },
      "source": [
        "# Download and extract CheckList repository for the test suites\n",
        "!git clone https://github.com/marcotcr/checklist.git\n",
        "!tar xvzf checklist/release_data.tar.gz"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "ZubJEKTNC0uK"
      },
      "source": [
        "import checklist\n",
        "import logging\n",
        "import numpy as np\n",
        "import torch\n",
        "\n",
        "from checklist.test_suite import TestSuite\n",
        "\n",
        "logging.basicConfig(level=logging.ERROR)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "SdgHD3EUsKtM"
      },
      "source": [
        "# Sentiment Analysis CheckList"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "nFvjRcNPEBRS"
      },
      "source": [
        "def chunks(l, n):\n",
        "    \"\"\"Yield successive n-sized chunks from l.\"\"\"\n",
        "    for i in range(0, len(l), n):\n",
        "        yield l[i:i + n]\n",
        "\n",
        "def batch_predict(model, data, batch_size=128):\n",
        "    ret = []\n",
        "    for d in chunks(data, batch_size):\n",
        "        ret.extend(model(d))\n",
        "    return ret\n",
        "\n",
        "def pred_and_conf(data):\n",
        "    # change format to softmax, make everything in [0.33, 0.66] range be predicted as neutral\n",
        "    preds = batch_predict(model, data)\n",
        "    pr = np.array([x['score'] if x['label'] == 'POSITIVE' else 1 - x['score'] for x in preds])\n",
        "    pp = np.zeros((pr.shape[0], 3))\n",
        "    margin_neutral = 1/3.\n",
        "    mn = margin_neutral / 2.\n",
        "    neg = pr < 0.5 - mn\n",
        "    pp[neg, 0] = 1 - pr[neg]\n",
        "    pp[neg, 2] = pr[neg]\n",
        "    pos = pr > 0.5 + mn\n",
        "    pp[pos, 0] = 1 - pr[pos]\n",
        "    pp[pos, 2] = pr[pos]\n",
        "    neutral_pos = (pr >= 0.5) * (pr < 0.5 + mn)\n",
        "    pp[neutral_pos, 1] = 1 - (1 / margin_neutral) * np.abs(pr[neutral_pos] - 0.5)\n",
        "    pp[neutral_pos, 2] = 1 - pp[neutral_pos, 1]\n",
        "    neutral_neg = (pr < 0.5) * (pr > 0.5 - mn)\n",
        "    pp[neutral_neg, 1] = 1 - (1 / margin_neutral) * np.abs(pr[neutral_neg] - 0.5)\n",
        "    pp[neutral_neg, 0] = 1 - pp[neutral_neg, 1]\n",
        "    preds = np.argmax(pp, axis=1)\n",
        "    return preds, pp"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "llMD73-UD-pR",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 264,
          "referenced_widgets": [
            "5f44a54d876b46a8b163562f99196208",
            "28143c68c4f343afb616ec490d979c8e",
            "dff2a5f0486b47bca81c731a26694fd8",
            "7e04d3d0e459439796f321fce45d4b59",
            "49652ad678704e92927573490bd3dd1e",
            "ec8aaf93f75343599ca1f76a1fa26e6f",
            "1ae62efc439a41198064e8f3baeacda4",
            "fcb19d385beb4a5ea0e2c49ff25038a4",
            "c8bbb81a29bf483398c803a3137df2cf",
            "e6a20e45c04d4b7ca4403c5b2fdfcbcd",
            "039a48b64fbf4476aa2239e6a2b011b0",
            "2ffd8bdb4c334db2b16738d2ad8c4b47",
            "d85c54a0e5cf4697ad307d765ac35632",
            "50bf4fff68d8429c9500c74e17793901",
            "5bd4457b1f64454aac2bc4d2d7ff0d66",
            "5f6b4c1fbb404f5eb570b30435f63a99",
            "9a436c87814444ec8a00368cb7fa30d6",
            "2969c124dc9c4b4ea621431c35d90243",
            "89a6918e41624f79be4f3e8526ce1da2",
            "6d474c8bdee0468785af9b8497108075",
            "0782a22fe2ed4dfcab738484bbdc27b0",
            "708ca62908f64aa58e65888656d82631",
            "7e9918670348461481b2de18f32fd7c4",
            "ea96c90a7bd441acbbd1be6aca5c53c0",
            "c0b1cc03c14d47918b8e395696961a14",
            "c1954c05bb8f4d5eb6aa04364c860d62",
            "208fbd4e59d24a718b82fb081b4c3726",
            "5d57c423d2214a89831a3822231e4bb6",
            "306c436ffd944b74a97956017cf32a74",
            "885fdeeb4bc5422a9378408f0f2bccc9",
            "1f8a35ac9c8a4f08ae992b98426d1054",
            "f599408a797f4508a27228760d6555e7",
            "d274695d190e4593baa492a83e8bf4a0",
            "b0ee26f638504d0eb3ef2b8e20319faf",
            "a760693ab85e432cb0ef9ec5d86603f0",
            "f4167f262d4b4782817e641b25cebae1",
            "2431343e67d04980befd770a7655743c",
            "95c5f3954dd145b99ec651ea7364facb",
            "f5020dd2f7834eb4961853411b665202",
            "17daa2c463c947db9eabc19225014325"
          ]
        },
        "outputId": "80da2aa6-a4ad-4172-d0f9-e5cfff685d16"
      },
      "source": [
        "from transformers import pipeline\n",
        "\n",
        "models = [\n",
        "  'textattack/bert-base-uncased-rotten_tomatoes',\n",
        "  'textattack/albert-base-v2-rotten_tomatoes',\n",
        "  'textattack/bert-base-uncased-yelp-polarity',\n",
        "  'textattack/albert-base-v2-yelp-polarity',\n",
        "  'textattack/bert-base-uncased-SST-2',\n",
        "  'textattack/albert-base-v2-SST-2',\n",
        "]\n",
        "\n",
        "# Load model\n",
        "model = pipeline('sentiment-analysis', model=models[5], device=0)"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "5f44a54d876b46a8b163562f99196208",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=732.0, style=ProgressStyle(description_…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "c8bbb81a29bf483398c803a3137df2cf",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=46747112.0, style=ProgressStyle(descrip…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "9a436c87814444ec8a00368cb7fa30d6",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=760289.0, style=ProgressStyle(descripti…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "c0b1cc03c14d47918b8e395696961a14",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=156.0, style=ProgressStyle(description_…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "display_data",
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "d274695d190e4593baa492a83e8bf4a0",
              "version_minor": 0,
              "version_major": 2
            },
            "text/plain": [
              "HBox(children=(FloatProgress(value=0.0, description='Downloading', max=25.0, style=ProgressStyle(description_w…"
            ]
          },
          "metadata": {
            "tags": []
          }
        },
        {
          "output_type": "stream",
          "text": [
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "UehCexH_yLwH"
      },
      "source": [
        "# Load provided test suite\n",
        "suite_path = 'release_data/sentiment/sentiment_suite.pkl'\n",
        "suite = TestSuite.from_file(suite_path)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Fx-VRZpsyQLG",
        "outputId": "9bcce2b3-2da4-4305-daa8-3b6038851ca7"
      },
      "source": [
        "%time suite.run(pred_and_conf, seed=1) # textattack/bert-base-uncased-rotten_tomatoes"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running single positive words\n",
            "Predicting 34 examples\n",
            "Running single negative words\n",
            "Predicting 35 examples\n",
            "Running single neutral words\n",
            "Predicting 13 examples\n",
            "Running Sentiment-laden words in context\n",
            "Predicting 8658 examples\n",
            "Running neutral words in context\n",
            "Predicting 1716 examples\n",
            "Running intensifiers\n",
            "Predicting 4000 examples\n",
            "Running reducers\n",
            "Predicting 4000 examples\n",
            "Running change neutral words with BERT\n",
            "Predicting 5046 examples\n",
            "Running add positive phrases\n",
            "Predicting 5500 examples\n",
            "Running add negative phrases\n",
            "Predicting 5500 examples\n",
            "Running add random urls and handles\n",
            "Predicting 11000 examples\n",
            "Running punctuation\n",
            "Predicting 1170 examples\n",
            "Running typos\n",
            "Predicting 1000 examples\n",
            "Running 2 typos\n",
            "Predicting 1000 examples\n",
            "Running contractions\n",
            "Predicting 2074 examples\n",
            "Running change names\n",
            "Predicting 3641 examples\n",
            "Running change locations\n",
            "Predicting 9999 examples\n",
            "Running change numbers\n",
            "Predicting 11000 examples\n",
            "Running used to, but now\n",
            "Predicting 8000 examples\n",
            "Running \"used to\" should reduce\n",
            "Predicting 8736 examples\n",
            "Running protected: race\n",
            "Predicting 2400 examples\n",
            "Running protected: sexual\n",
            "Predicting 8400 examples\n",
            "Running protected: religion\n",
            "Predicting 13200 examples\n",
            "Running protected: nationality\n",
            "Predicting 12000 examples\n",
            "Running simple negations: negative\n",
            "Predicting 6318 examples\n",
            "Running simple negations: not negative\n",
            "Predicting 6786 examples\n",
            "Running simple negations: not neutral is still neutral\n",
            "Predicting 2496 examples\n",
            "Running simple negations: I thought x was positive, but it was not (should be negative)\n",
            "Predicting 1992 examples\n",
            "Running simple negations: I thought x was negative, but it was not (should be neutral or positive)\n",
            "Predicting 2124 examples\n",
            "Running simple negations: but it was not (neutral) should still be neutral\n",
            "Predicting 804 examples\n",
            "Running Hard: Negation of positive with neutral stuff in the middle (should be negative)\n",
            "Predicting 1000 examples\n",
            "Running Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)\n",
            "Predicting 1000 examples\n",
            "Running negation of neutral with neutral in the middle, should still neutral\n",
            "Predicting 1000 examples\n",
            "Running my opinion is what matters\n",
            "Predicting 8528 examples\n",
            "Running Q & A: yes\n",
            "Predicting 7644 examples\n",
            "Running Q & A: yes (neutral)\n",
            "Predicting 1560 examples\n",
            "Running Q & A: no\n",
            "Predicting 7644 examples\n",
            "Running Q & A: no (neutral)\n",
            "Predicting 1560 examples\n",
            "CPU times: user 2min 28s, sys: 1min 44s, total: 4min 13s\n",
            "Wall time: 4min 7s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "K16zALr2vql-",
        "outputId": "74da6cac-86da-400d-cc59-63eb029a13b7"
      },
      "source": [
        "%time suite.run(pred_and_conf, seed=1) # textattack/albert-base-v2-rotten_tomatoes"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running single positive words\n",
            "Predicting 34 examples\n",
            "Running single negative words\n",
            "Predicting 35 examples\n",
            "Running single neutral words\n",
            "Predicting 13 examples\n",
            "Running Sentiment-laden words in context\n",
            "Predicting 8658 examples\n",
            "Running neutral words in context\n",
            "Predicting 1716 examples\n",
            "Running intensifiers\n",
            "Predicting 4000 examples\n",
            "Running reducers\n",
            "Predicting 4000 examples\n",
            "Running change neutral words with BERT\n",
            "Predicting 5046 examples\n",
            "Running add positive phrases\n",
            "Predicting 5500 examples\n",
            "Running add negative phrases\n",
            "Predicting 5500 examples\n",
            "Running add random urls and handles\n",
            "Predicting 11000 examples\n",
            "Running punctuation\n",
            "Predicting 1170 examples\n",
            "Running typos\n",
            "Predicting 1000 examples\n",
            "Running 2 typos\n",
            "Predicting 1000 examples\n",
            "Running contractions\n",
            "Predicting 2074 examples\n",
            "Running change names\n",
            "Predicting 3641 examples\n",
            "Running change locations\n",
            "Predicting 9999 examples\n",
            "Running change numbers\n",
            "Predicting 11000 examples\n",
            "Running used to, but now\n",
            "Predicting 8000 examples\n",
            "Running \"used to\" should reduce\n",
            "Predicting 8736 examples\n",
            "Running protected: race\n",
            "Predicting 2400 examples\n",
            "Running protected: sexual\n",
            "Predicting 8400 examples\n",
            "Running protected: religion\n",
            "Predicting 13200 examples\n",
            "Running protected: nationality\n",
            "Predicting 12000 examples\n",
            "Running simple negations: negative\n",
            "Predicting 6318 examples\n",
            "Running simple negations: not negative\n",
            "Predicting 6786 examples\n",
            "Running simple negations: not neutral is still neutral\n",
            "Predicting 2496 examples\n",
            "Running simple negations: I thought x was positive, but it was not (should be negative)\n",
            "Predicting 1992 examples\n",
            "Running simple negations: I thought x was negative, but it was not (should be neutral or positive)\n",
            "Predicting 2124 examples\n",
            "Running simple negations: but it was not (neutral) should still be neutral\n",
            "Predicting 804 examples\n",
            "Running Hard: Negation of positive with neutral stuff in the middle (should be negative)\n",
            "Predicting 1000 examples\n",
            "Running Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)\n",
            "Predicting 1000 examples\n",
            "Running negation of neutral with neutral in the middle, should still neutral\n",
            "Predicting 1000 examples\n",
            "Running my opinion is what matters\n",
            "Predicting 8528 examples\n",
            "Running Q & A: yes\n",
            "Predicting 7644 examples\n",
            "Running Q & A: yes (neutral)\n",
            "Predicting 1560 examples\n",
            "Running Q & A: no\n",
            "Predicting 7644 examples\n",
            "Running Q & A: no (neutral)\n",
            "Predicting 1560 examples\n",
            "CPU times: user 2min 49s, sys: 1min 58s, total: 4min 48s\n",
            "Wall time: 4min 41s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Pp0JTc2WxFKE",
        "outputId": "6b2d7198-fca3-4e6e-a12c-0d12a315251f"
      },
      "source": [
        "%time suite.run(pred_and_conf, seed=1) # textattack/bert-base-uncased-yelp-polarity"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running single positive words\n",
            "Predicting 34 examples\n",
            "Running single negative words\n",
            "Predicting 35 examples\n",
            "Running single neutral words\n",
            "Predicting 13 examples\n",
            "Running Sentiment-laden words in context\n",
            "Predicting 8658 examples\n",
            "Running neutral words in context\n",
            "Predicting 1716 examples\n",
            "Running intensifiers\n",
            "Predicting 4000 examples\n",
            "Running reducers\n",
            "Predicting 4000 examples\n",
            "Running change neutral words with BERT\n",
            "Predicting 5046 examples\n",
            "Running add positive phrases\n",
            "Predicting 5500 examples\n",
            "Running add negative phrases\n",
            "Predicting 5500 examples\n",
            "Running add random urls and handles\n",
            "Predicting 11000 examples\n",
            "Running punctuation\n",
            "Predicting 1170 examples\n",
            "Running typos\n",
            "Predicting 1000 examples\n",
            "Running 2 typos\n",
            "Predicting 1000 examples\n",
            "Running contractions\n",
            "Predicting 2074 examples\n",
            "Running change names\n",
            "Predicting 3641 examples\n",
            "Running change locations\n",
            "Predicting 9999 examples\n",
            "Running change numbers\n",
            "Predicting 11000 examples\n",
            "Running used to, but now\n",
            "Predicting 8000 examples\n",
            "Running \"used to\" should reduce\n",
            "Predicting 8736 examples\n",
            "Running protected: race\n",
            "Predicting 2400 examples\n",
            "Running protected: sexual\n",
            "Predicting 8400 examples\n",
            "Running protected: religion\n",
            "Predicting 13200 examples\n",
            "Running protected: nationality\n",
            "Predicting 12000 examples\n",
            "Running simple negations: negative\n",
            "Predicting 6318 examples\n",
            "Running simple negations: not negative\n",
            "Predicting 6786 examples\n",
            "Running simple negations: not neutral is still neutral\n",
            "Predicting 2496 examples\n",
            "Running simple negations: I thought x was positive, but it was not (should be negative)\n",
            "Predicting 1992 examples\n",
            "Running simple negations: I thought x was negative, but it was not (should be neutral or positive)\n",
            "Predicting 2124 examples\n",
            "Running simple negations: but it was not (neutral) should still be neutral\n",
            "Predicting 804 examples\n",
            "Running Hard: Negation of positive with neutral stuff in the middle (should be negative)\n",
            "Predicting 1000 examples\n",
            "Running Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)\n",
            "Predicting 1000 examples\n",
            "Running negation of neutral with neutral in the middle, should still neutral\n",
            "Predicting 1000 examples\n",
            "Running my opinion is what matters\n",
            "Predicting 8528 examples\n",
            "Running Q & A: yes\n",
            "Predicting 7644 examples\n",
            "Running Q & A: yes (neutral)\n",
            "Predicting 1560 examples\n",
            "Running Q & A: no\n",
            "Predicting 7644 examples\n",
            "Running Q & A: no (neutral)\n",
            "Predicting 1560 examples\n",
            "CPU times: user 2min 29s, sys: 1min 44s, total: 4min 14s\n",
            "Wall time: 4min 8s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "8SRw5jY_zMND",
        "outputId": "0724843e-0102-4006-bf9b-9e47c04a491f"
      },
      "source": [
        "%time suite.run(pred_and_conf, seed=1) # textattack/albert-base-v2-yelp-polarity"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running single positive words\n",
            "Predicting 34 examples\n",
            "Running single negative words\n",
            "Predicting 35 examples\n",
            "Running single neutral words\n",
            "Predicting 13 examples\n",
            "Running Sentiment-laden words in context\n",
            "Predicting 8658 examples\n",
            "Running neutral words in context\n",
            "Predicting 1716 examples\n",
            "Running intensifiers\n",
            "Predicting 4000 examples\n",
            "Running reducers\n",
            "Predicting 4000 examples\n",
            "Running change neutral words with BERT\n",
            "Predicting 5046 examples\n",
            "Running add positive phrases\n",
            "Predicting 5500 examples\n",
            "Running add negative phrases\n",
            "Predicting 5500 examples\n",
            "Running add random urls and handles\n",
            "Predicting 11000 examples\n",
            "Running punctuation\n",
            "Predicting 1170 examples\n",
            "Running typos\n",
            "Predicting 1000 examples\n",
            "Running 2 typos\n",
            "Predicting 1000 examples\n",
            "Running contractions\n",
            "Predicting 2074 examples\n",
            "Running change names\n",
            "Predicting 3641 examples\n",
            "Running change locations\n",
            "Predicting 9999 examples\n",
            "Running change numbers\n",
            "Predicting 11000 examples\n",
            "Running used to, but now\n",
            "Predicting 8000 examples\n",
            "Running \"used to\" should reduce\n",
            "Predicting 8736 examples\n",
            "Running protected: race\n",
            "Predicting 2400 examples\n",
            "Running protected: sexual\n",
            "Predicting 8400 examples\n",
            "Running protected: religion\n",
            "Predicting 13200 examples\n",
            "Running protected: nationality\n",
            "Predicting 12000 examples\n",
            "Running simple negations: negative\n",
            "Predicting 6318 examples\n",
            "Running simple negations: not negative\n",
            "Predicting 6786 examples\n",
            "Running simple negations: not neutral is still neutral\n",
            "Predicting 2496 examples\n",
            "Running simple negations: I thought x was positive, but it was not (should be negative)\n",
            "Predicting 1992 examples\n",
            "Running simple negations: I thought x was negative, but it was not (should be neutral or positive)\n",
            "Predicting 2124 examples\n",
            "Running simple negations: but it was not (neutral) should still be neutral\n",
            "Predicting 804 examples\n",
            "Running Hard: Negation of positive with neutral stuff in the middle (should be negative)\n",
            "Predicting 1000 examples\n",
            "Running Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)\n",
            "Predicting 1000 examples\n",
            "Running negation of neutral with neutral in the middle, should still neutral\n",
            "Predicting 1000 examples\n",
            "Running my opinion is what matters\n",
            "Predicting 8528 examples\n",
            "Running Q & A: yes\n",
            "Predicting 7644 examples\n",
            "Running Q & A: yes (neutral)\n",
            "Predicting 1560 examples\n",
            "Running Q & A: no\n",
            "Predicting 7644 examples\n",
            "Running Q & A: no (neutral)\n",
            "Predicting 1560 examples\n",
            "CPU times: user 2min 49s, sys: 1min 58s, total: 4min 48s\n",
            "Wall time: 4min 41s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "TGyNFgiw0tcW",
        "outputId": "f0a1a73d-c759-4d75-9583-2ff185b6b757"
      },
      "source": [
        "%time suite.run(pred_and_conf, seed=1) # textattack/bert-base-uncased-SST-2"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running single positive words\n",
            "Predicting 34 examples\n",
            "Running single negative words\n",
            "Predicting 35 examples\n",
            "Running single neutral words\n",
            "Predicting 13 examples\n",
            "Running Sentiment-laden words in context\n",
            "Predicting 8658 examples\n",
            "Running neutral words in context\n",
            "Predicting 1716 examples\n",
            "Running intensifiers\n",
            "Predicting 4000 examples\n",
            "Running reducers\n",
            "Predicting 4000 examples\n",
            "Running change neutral words with BERT\n",
            "Predicting 5046 examples\n",
            "Running add positive phrases\n",
            "Predicting 5500 examples\n",
            "Running add negative phrases\n",
            "Predicting 5500 examples\n",
            "Running add random urls and handles\n",
            "Predicting 11000 examples\n",
            "Running punctuation\n",
            "Predicting 1170 examples\n",
            "Running typos\n",
            "Predicting 1000 examples\n",
            "Running 2 typos\n",
            "Predicting 1000 examples\n",
            "Running contractions\n",
            "Predicting 2074 examples\n",
            "Running change names\n",
            "Predicting 3641 examples\n",
            "Running change locations\n",
            "Predicting 9999 examples\n",
            "Running change numbers\n",
            "Predicting 11000 examples\n",
            "Running used to, but now\n",
            "Predicting 8000 examples\n",
            "Running \"used to\" should reduce\n",
            "Predicting 8736 examples\n",
            "Running protected: race\n",
            "Predicting 2400 examples\n",
            "Running protected: sexual\n",
            "Predicting 8400 examples\n",
            "Running protected: religion\n",
            "Predicting 13200 examples\n",
            "Running protected: nationality\n",
            "Predicting 12000 examples\n",
            "Running simple negations: negative\n",
            "Predicting 6318 examples\n",
            "Running simple negations: not negative\n",
            "Predicting 6786 examples\n",
            "Running simple negations: not neutral is still neutral\n",
            "Predicting 2496 examples\n",
            "Running simple negations: I thought x was positive, but it was not (should be negative)\n",
            "Predicting 1992 examples\n",
            "Running simple negations: I thought x was negative, but it was not (should be neutral or positive)\n",
            "Predicting 2124 examples\n",
            "Running simple negations: but it was not (neutral) should still be neutral\n",
            "Predicting 804 examples\n",
            "Running Hard: Negation of positive with neutral stuff in the middle (should be negative)\n",
            "Predicting 1000 examples\n",
            "Running Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)\n",
            "Predicting 1000 examples\n",
            "Running negation of neutral with neutral in the middle, should still neutral\n",
            "Predicting 1000 examples\n",
            "Running my opinion is what matters\n",
            "Predicting 8528 examples\n",
            "Running Q & A: yes\n",
            "Predicting 7644 examples\n",
            "Running Q & A: yes (neutral)\n",
            "Predicting 1560 examples\n",
            "Running Q & A: no\n",
            "Predicting 7644 examples\n",
            "Running Q & A: no (neutral)\n",
            "Predicting 1560 examples\n",
            "CPU times: user 2min 29s, sys: 1min 44s, total: 4min 14s\n",
            "Wall time: 4min 8s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "hos2tRhb2LYN",
        "outputId": "bd125e16-b859-4219-83ce-47ba3e733007"
      },
      "source": [
        "%time suite.run(pred_and_conf, seed=1) # textattack/albert-base-v2-SST-2"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running single positive words\n",
            "Predicting 34 examples\n",
            "Running single negative words\n",
            "Predicting 35 examples\n",
            "Running single neutral words\n",
            "Predicting 13 examples\n",
            "Running Sentiment-laden words in context\n",
            "Predicting 8658 examples\n",
            "Running neutral words in context\n",
            "Predicting 1716 examples\n",
            "Running intensifiers\n",
            "Predicting 4000 examples\n",
            "Running reducers\n",
            "Predicting 4000 examples\n",
            "Running change neutral words with BERT\n",
            "Predicting 5046 examples\n",
            "Running add positive phrases\n",
            "Predicting 5500 examples\n",
            "Running add negative phrases\n",
            "Predicting 5500 examples\n",
            "Running add random urls and handles\n",
            "Predicting 11000 examples\n",
            "Running punctuation\n",
            "Predicting 1170 examples\n",
            "Running typos\n",
            "Predicting 1000 examples\n",
            "Running 2 typos\n",
            "Predicting 1000 examples\n",
            "Running contractions\n",
            "Predicting 2074 examples\n",
            "Running change names\n",
            "Predicting 3641 examples\n",
            "Running change locations\n",
            "Predicting 9999 examples\n",
            "Running change numbers\n",
            "Predicting 11000 examples\n",
            "Running used to, but now\n",
            "Predicting 8000 examples\n",
            "Running \"used to\" should reduce\n",
            "Predicting 8736 examples\n",
            "Running protected: race\n",
            "Predicting 2400 examples\n",
            "Running protected: sexual\n",
            "Predicting 8400 examples\n",
            "Running protected: religion\n",
            "Predicting 13200 examples\n",
            "Running protected: nationality\n",
            "Predicting 12000 examples\n",
            "Running simple negations: negative\n",
            "Predicting 6318 examples\n",
            "Running simple negations: not negative\n",
            "Predicting 6786 examples\n",
            "Running simple negations: not neutral is still neutral\n",
            "Predicting 2496 examples\n",
            "Running simple negations: I thought x was positive, but it was not (should be negative)\n",
            "Predicting 1992 examples\n",
            "Running simple negations: I thought x was negative, but it was not (should be neutral or positive)\n",
            "Predicting 2124 examples\n",
            "Running simple negations: but it was not (neutral) should still be neutral\n",
            "Predicting 804 examples\n",
            "Running Hard: Negation of positive with neutral stuff in the middle (should be negative)\n",
            "Predicting 1000 examples\n",
            "Running Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)\n",
            "Predicting 1000 examples\n",
            "Running negation of neutral with neutral in the middle, should still neutral\n",
            "Predicting 1000 examples\n",
            "Running my opinion is what matters\n",
            "Predicting 8528 examples\n",
            "Running Q & A: yes\n",
            "Predicting 7644 examples\n",
            "Running Q & A: yes (neutral)\n",
            "Predicting 1560 examples\n",
            "Running Q & A: no\n",
            "Predicting 7644 examples\n",
            "Running Q & A: no (neutral)\n",
            "Predicting 1560 examples\n",
            "CPU times: user 2min 49s, sys: 1min 58s, total: 4min 48s\n",
            "Wall time: 4min 41s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-0_MqM1W-FdT",
        "outputId": "ae66884e-6c10-4daf-a12e-300563b1406b"
      },
      "source": [
        "suite.summary()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Vocabulary\n",
            "\n",
            "single positive words\n",
            "Test cases:      34\n",
            "Fails (rate):    34 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 0.0 0.0 wonderful\n",
            "----\n",
            "1.0 0.0 0.0 admired\n",
            "----\n",
            "1.0 0.0 0.0 awesome\n",
            "----\n",
            "\n",
            "\n",
            "single negative words\n",
            "Test cases:      35\n",
            "Fails (rate):    1 (2.9%)\n",
            "\n",
            "Example fails:\n",
            "0.2 0.8 0.0 average\n",
            "----\n",
            "\n",
            "\n",
            "single neutral words\n",
            "Test cases:      13\n",
            "Fails (rate):    13 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "0.9 0.0 0.1 British\n",
            "----\n",
            "0.9 0.0 0.1 commercial\n",
            "----\n",
            "0.9 0.0 0.1 international\n",
            "----\n",
            "\n",
            "\n",
            "Sentiment-laden words in context\n",
            "Test cases:      8658\n",
            "Fails (rate):    4284 (49.5%)\n",
            "\n",
            "Example fails:\n",
            "1.0 0.0 0.0 That cabin crew was great.\n",
            "----\n",
            "1.0 0.0 0.0 I appreciate the food.\n",
            "----\n",
            "1.0 0.0 0.0 It was an awesome cabin crew.\n",
            "----\n",
            "\n",
            "\n",
            "neutral words in context\n",
            "Test cases:      1716\n",
            "Fails (rate):    1540 (89.7%)\n",
            "\n",
            "Example fails:\n",
            "1.0 0.0 0.0 I see this seat.\n",
            "----\n",
            "1.0 0.0 0.0 That is an Italian food.\n",
            "----\n",
            "0.7 0.0 0.3 That airline was British.\n",
            "----\n",
            "\n",
            "\n",
            "intensifiers\n",
            "Test cases:      2000\n",
            "After filtering: 1970 (98.5%)\n",
            "Fails (rate):    40 (2.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 0.0 0.0 That is an adorable seat.\n",
            "0.4 0.6 0.0 That is an unbelievably adorable seat.\n",
            "\n",
            "----\n",
            "1.0 0.0 0.0 It was an excellent customer service.\n",
            "0.9 0.0 0.1 It was an unbelievably excellent customer service.\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 This is a weird cabin crew.\n",
            "0.3 0.7 0.0 This is an especially weird cabin crew.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "reducers\n",
            "Test cases:      2000\n",
            "After filtering: 34 (1.7%)\n",
            "Fails (rate):    5 (14.7%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 The crew was hard.\n",
            "0.9 0.0 0.1 The crew was mostly hard.\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 That crew was hard.\n",
            "0.9 0.0 0.1 That crew was mostly hard.\n",
            "\n",
            "----\n",
            "0.7 0.0 0.3 This airline was average.\n",
            "0.9 0.0 0.1 This airline was mostly average.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "change neutral words with BERT\n",
            "Test cases:      500\n",
            "Fails (rate):    50 (10.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 0.0 0.0 @JetBlue are you my friend?!\n",
            "0.2 0.8 0.0 @JetBlue are really my friend?!\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 @united Hubby made it by the skin of his teeth!   :)\n",
            "0.3 0.7 0.0 @united Hubby made man by the skin of his teeth!   :)\n",
            "\n",
            "----\n",
            "0.5 0.5 0.0 “@AmericanAir: @RussellsWriting Russ, please contact Reservations at 800-433-7300 for reFlight Booking Problems options.” Good luck with that!\n",
            "0.7 0.0 0.3 “@AmericanAir: @RussellsWriting Russ, please contact Reservations at 800-433-7300 about reFlight Booking Problems options.” Good luck with that!\n",
            "0.7 0.0 0.3 “@AmericanAir: @RussellsWriting Russ, please contact Reservations at 800-433-7300 regarding reFlight Booking Problems options.” Good luck with that!\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "add positive phrases\n",
            "Test cases:      500\n",
            "Fails (rate):    139 (27.8%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 @SouthwestAir would be cool if the best airlines company hooked me up with @Imaginedragons tickets for Vegas ;)\n",
            "1.0 0.0 0.0 @SouthwestAir would be cool if the best airlines company hooked me up with @Imaginedragons tickets for Vegas ;). I recommend you.\n",
            "1.0 0.0 0.0 @SouthwestAir would be cool if the best airlines company hooked me up with @Imaginedragons tickets for Vegas ;). You are wonderful.\n",
            "\n",
            "----\n",
            "0.7 0.0 0.3 @united \"federal regulation prohibits you being separated from your bag\"...yet you lose our bag and that's okay\n",
            "1.0 0.0 0.0 @united \"federal regulation prohibits you being separated from your bag\"...yet you lose our bag and that's okay. You are wonderful.\n",
            "1.0 0.0 0.0 @united \"federal regulation prohibits you being separated from your bag\"...yet you lose our bag and that's okay. You are brilliant.\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 @SouthwestAir Thx Ops Agt Rich Westagard n Flight Att. Nancy @ DEN Airport.Held flight 1027 n even saved seat 4 Bus Select #CustomersFirst!\n",
            "1.0 0.0 0.0 @SouthwestAir Thx Ops Agt Rich Westagard n Flight Att. Nancy @ DEN Airport.Held flight 1027 n even saved seat 4 Bus Select #CustomersFirst. You are wonderful.\n",
            "1.0 0.0 0.0 @SouthwestAir Thx Ops Agt Rich Westagard n Flight Att. Nancy @ DEN Airport.Held flight 1027 n even saved seat 4 Bus Select #CustomersFirst. You are brilliant.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "add negative phrases\n",
            "Test cases:      500\n",
            "Fails (rate):    124 (24.8%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 @JetBlue according to jfk plenty of planes are landing. No problem there\n",
            "0.2 0.8 0.0 @JetBlue according to jfk plenty of planes are landing. No problem there. You are creepy.\n",
            "0.2 0.8 0.0 @JetBlue according to jfk plenty of planes are landing. No problem there. You are average.\n",
            "\n",
            "----\n",
            "0.9 0.0 0.1 @USAirways #DividendRewards Urgently need to speak with a CS rep. My 50K miles havent shown &amp; I've fulfilled all reqs. Flight Booking Problems flights today\n",
            "0.8 0.0 0.2 @USAirways #DividendRewards Urgently need to speak with a CS rep. My 50K miles havent shown &amp; I've fulfilled all reqs. Flight Booking Problems flights today. Never flying with you again.\n",
            "\n",
            "----\n",
            "1.0 0.0 0.0 @USAirways see you on board tomorrow\n",
            "0.2 0.8 0.0 @USAirways see you on board tomorrow. You are hard.\n",
            "0.7 0.0 0.3 @USAirways see you on board tomorrow. I hate you.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Robustness\n",
            "\n",
            "add random urls and handles\n",
            "Test cases:      500\n",
            "Fails (rate):    69 (13.8%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 @united We invite to Fallow @HelacoHLC learn about our activities.Prevention Programs of Health by Condom-Rito Family.We R 501(C)(3)Thanks\n",
            "0.0 1.0 0.0 @RlVykz @united We invite to Fallow @HelacoHLC learn about our activities.Prevention Programs of Health by Condom-Rito Family.We R 501(C)(3)Thanks\n",
            "0.1 0.9 0.0 @8SEglC @united We invite to Fallow @HelacoHLC learn about our activities.Prevention Programs of Health by Condom-Rito Family.We R 501(C)(3)Thanks\n",
            "\n",
            "----\n",
            "0.7 0.0 0.3 @USAirways please follow me so I can DM you about something\n",
            "0.1 0.9 0.0 @xrwZF2 @USAirways please follow me so I can DM you about something\n",
            "0.2 0.8 0.0 https://t.co/YUs5R8 @USAirways please follow me so I can DM you about something\n",
            "\n",
            "----\n",
            "0.1 0.9 0.0 @united @bobwesson fair enough United. everybody is doing the best they can. Although that \"slight delay\" is turning into a Cancelled Flightlation.\n",
            "0.7 0.0 0.3 https://t.co/IBiiAB @united @bobwesson fair enough United. everybody is doing the best they can. Although that \"slight delay\" is turning into a Cancelled Flightlation.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "punctuation\n",
            "Test cases:      500\n",
            "Fails (rate):    19 (3.8%)\n",
            "\n",
            "Example fails:\n",
            "0.7 0.0 0.3 @AmericanAir flight number #2386\n",
            "0.4 0.6 0.0 @AmericanAir flight number #2386.\n",
            "\n",
            "----\n",
            "0.4 0.6 0.0 @united Okay, just requested to follow.\n",
            "0.7 0.0 0.3 @united Okay, just requested to follow\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 @united despite my bag not making it to Newark good informative email tracking updates help!\n",
            "0.4 0.6 0.0 @united despite my bag not making it to Newark good informative email tracking updates help\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "typos\n",
            "Test cases:      500\n",
            "Fails (rate):    33 (6.6%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 @JetBlue Pittsburgh\n",
            "0.5 0.5 0.0 @JetBlu ePittsburgh\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 @united we've been waiting 45 min for a gate at SFO... Yet so many of them are free.  Your excellence in operational efficiency is showing\n",
            "0.5 0.5 0.0 @united we've been waiting 45 min for a gate at SFO... Yet so many of them are free.  Your excellenc ein operational efficiency is showing\n",
            "\n",
            "----\n",
            "0.0 1.0 0.0 @united I'll be sending/posting complete details of the circumstances surrounding this matter.  United airlines should be ashamed.\n",
            "0.7 0.0 0.3 @united I'll be sending/posting complete details of the circumstance ssurrounding this matter.  United airlines should be ashamed.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "2 typos\n",
            "Test cases:      500\n",
            "Fails (rate):    52 (10.4%)\n",
            "\n",
            "Example fails:\n",
            "0.7 0.0 0.3 @JetBlue thanks for the heads up about the 2 hour delay #sarcasm #patienceiswearingthin #woof\n",
            "0.2 0.8 0.0 @JetBlue thanks for the heads up about the 2 ohur delay #sarcasm #patiecneiswearingthin #woof\n",
            "\n",
            "----\n",
            "1.0 0.0 0.0 @SouthwestAir aww thanks!! Other than that, love it!\n",
            "0.4 0.6 0.0 @SouthwestAir aww thanks!! Other than tha,t olve it!\n",
            "\n",
            "----\n",
            "0.9 0.0 0.1 @SouthwestAir do you have any tickets to the Atlanta show? I would love to go with my sister ♥️🙏\n",
            "0.3 0.7 0.0 @SouthwestAir do you have any tickets to the Atlanta shwo? I would love to go with my siste r♥️🙏\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "contractions\n",
            "Test cases:      1000\n",
            "Fails (rate):    37 (3.7%)\n",
            "\n",
            "Example fails:\n",
            "0.0 1.0 0.0 @JetBlue that wasn't delayed or cabcelled\n",
            "0.7 0.0 0.3 @JetBlue that was not delayed or cabcelled\n",
            "\n",
            "----\n",
            "0.7 0.0 0.3 @united the os isn't controlled by me but rather @VerizonWireless .  App is new.\n",
            "0.4 0.6 0.0 @united the os is not controlled by me but rather @VerizonWireless .  App is new.\n",
            "\n",
            "----\n",
            "0.4 0.6 0.0 @united Yeah, bag is on the way. As per usual.  I'm actually getting used to getting it delivered to me, its kind of nice in a sense.\n",
            "0.7 0.0 0.3 @united Yeah, bag is on the way. As per usual.  I am actually getting used to getting it delivered to me, its kind of nice in a sense.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "NER\n",
            "\n",
            "change names\n",
            "Test cases:      331\n",
            "Fails (rate):    38 (11.5%)\n",
            "\n",
            "Example fails:\n",
            "0.4 0.6 0.0 @USAirways can u tell me if 5238 from clt to Jan has gotten out of the gate yet?  I will have a tight connection out of clt to mco.\n",
            "0.7 0.0 0.3 @USAirways can u tell me if 5238 from clt to Andrew has gotten out of the gate yet?  I will have a tight connection out of clt to mco.\n",
            "0.7 0.0 0.3 @USAirways can u tell me if 5238 from clt to Jeffrey has gotten out of the gate yet?  I will have a tight connection out of clt to mco.\n",
            "\n",
            "----\n",
            "0.2 0.8 0.0 @united Nicole at Quito airport took great care of us this week. Handled lost baggage, seat changes. Very professional/nice. Pat her back.\n",
            "0.7 0.0 0.3 @united Nicole at Quito airport took great care of us this week. Handled lost baggage, seat changes. Very professional/nice. Joshua her back.\n",
            "0.9 0.0 0.1 @united Nicole at Quito airport took great care of us this week. Handled lost baggage, seat changes. Very professional/nice. Carlos her back.\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 @VirginAmerica to jump into the #Dallas #Austin market http://t.co/SzR0pioA21\n",
            "0.2 0.8 0.0 @VirginAmerica to jump into the #Dallas #Jeffrey market http://t.co/SzR0pioA21\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "change locations\n",
            "Test cases:      909\n",
            "Fails (rate):    126 (13.9%)\n",
            "\n",
            "Example fails:\n",
            "0.3 0.7 0.0 @SouthwestAir when can we expect customer service in Dallas to be available\n",
            "0.7 0.0 0.3 @SouthwestAir when can we expect customer service in Cicero to be available\n",
            "0.7 0.0 0.3 @SouthwestAir when can we expect customer service in Alexandria to be available\n",
            "\n",
            "----\n",
            "0.2 0.8 0.0 @JetBlue flight 1041 to Savannah, GA\n",
            "0.9 0.0 0.1 @JetBlue flight 1041 to Turlock, GA\n",
            "0.7 0.0 0.3 @JetBlue flight 1041 to Appleton, GA\n",
            "\n",
            "----\n",
            "0.7 0.0 0.3 @VirginAmerica omg omg😍😍 nonstop Dallas to Austin on virgin✨😱✈️\n",
            "0.0 1.0 0.0 @VirginAmerica omg omg😍😍 nonstop Dallas to Oakland Park on virgin✨😱✈️\n",
            "0.1 0.9 0.0 @VirginAmerica omg omg😍😍 nonstop Dallas to Lakeland on virgin✨😱✈️\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "change numbers\n",
            "Test cases:      1000\n",
            "Fails (rate):    38 (3.8%)\n",
            "\n",
            "Example fails:\n",
            "0.7 0.0 0.3 @USAirways standing in line with 100 people all looking to do the same\n",
            "0.1 0.9 0.0 @USAirways standing in line with 87 people all looking to do the same\n",
            "0.1 0.9 0.0 @USAirways standing in line with 94 people all looking to do the same\n",
            "\n",
            "----\n",
            "0.4 0.6 0.0 @SouthwestAir I've been a fan of imagine dragons since 2012 and they're my fave band and #DestinationDragons is during my bday can I get tix\n",
            "0.7 0.0 0.3 @SouthwestAir I've been a fan of imagine dragons since 1772 and they're my fave band and #DestinationDragons is during my bday can I get tix\n",
            "0.7 0.0 0.3 @SouthwestAir I've been a fan of imagine dragons since 2351 and they're my fave band and #DestinationDragons is during my bday can I get tix\n",
            "\n",
            "----\n",
            "0.7 0.0 0.3 @JetBlue update on Flight 462 would be appreciated when you have time, thanks\n",
            "0.0 1.0 0.0 @JetBlue update on Flight 398 would be appreciated when you have time, thanks\n",
            "0.0 1.0 0.0 @JetBlue update on Flight 498 would be appreciated when you have time, thanks\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Fairness\n",
            "\n",
            "protected: race\n",
            "Test cases:      600\n",
            "Fails (rate):    225 (37.5%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 Rachel is a black atheist.\n",
            "0.4 0.6 0.0 Rachel is a hispanic atheist.\n",
            "0.5 0.5 0.0 Rachel is an asian atheist.\n",
            "\n",
            "----\n",
            "0.7 0.0 0.3 Hailey is a black feminist.\n",
            "0.0 1.0 0.0 Hailey is a hispanic feminist.\n",
            "0.1 0.9 0.0 Hailey is an asian feminist.\n",
            "\n",
            "----\n",
            "0.9 0.0 0.1 Heather is a black parent.\n",
            "0.1 0.9 0.0 Heather is a white parent.\n",
            "0.2 0.8 0.0 Heather is an asian parent.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "protected: sexual\n",
            "Test cases:      600\n",
            "Fails (rate):    533 (88.8%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 Luis is an asexual writer.\n",
            "0.1 0.9 0.0 Luis is a homosexual writer.\n",
            "0.3 0.7 0.0 Luis is a heterosexual writer.\n",
            "\n",
            "----\n",
            "0.1 0.9 0.0 Brian is an asexual subject.\n",
            "0.9 0.0 0.1 Brian is a bisexual subject.\n",
            "0.8 0.0 0.2 Brian is a homosexual subject.\n",
            "\n",
            "----\n",
            "0.0 1.0 0.0 Patrick is an asexual mother.\n",
            "0.7 0.0 0.3 Patrick is a bisexual mother.\n",
            "0.7 0.0 0.3 Patrick is a queer mother.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "protected: religion\n",
            "Test cases:      600\n",
            "Fails (rate):    561 (93.5%)\n",
            "\n",
            "Example fails:\n",
            "0.9 0.0 0.1 Brittany is a Christian Canadian.\n",
            "0.0 1.0 0.0 Brittany is an Ahmadiyya Canadian.\n",
            "0.1 0.9 0.0 Brittany is an Atheist Canadian.\n",
            "\n",
            "----\n",
            "1.0 0.0 0.0 Isaiah is a Christian God.\n",
            "0.0 1.0 0.0 Isaiah is a Jain God.\n",
            "0.0 1.0 0.0 Isaiah is a Buddhist God.\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 Leah is a Christian actress.\n",
            "0.0 1.0 0.0 Leah is an Atheist actress.\n",
            "0.1 0.9 0.0 Leah is a Hindu actress.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "protected: nationality\n",
            "Test cases:      600\n",
            "Fails (rate):    254 (42.3%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 Luke is a Chinese American.\n",
            "0.2 0.8 0.0 Luke is a Turkish American.\n",
            "0.3 0.7 0.0 Luke is a Thai American.\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 Jasmine is a Chinese Muslim.\n",
            "0.4 0.6 0.0 Jasmine is a German Muslim.\n",
            "0.5 0.5 0.0 Jasmine is a Thai Muslim.\n",
            "\n",
            "----\n",
            "0.9 0.0 0.1 Kenneth is a Chinese restaurant.\n",
            "0.1 0.9 0.0 Kenneth is a Turkish restaurant.\n",
            "0.2 0.8 0.0 Kenneth is a German restaurant.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Temporal\n",
            "\n",
            "used to, but now\n",
            "Test cases:      8000\n",
            "Fails (rate):    4351 (54.4%)\n",
            "\n",
            "Example fails:\n",
            "0.9 0.0 0.1 I value this airline,  in the past I would regret it.\n",
            "----\n",
            "0.9 0.0 0.1 In the past I would hate this airline,  now I admired it.\n",
            "----\n",
            "0.9 0.0 0.1 I think this airline is amazing, but in the past I thought it was rough.\n",
            "----\n",
            "\n",
            "\n",
            "\"used to\" should reduce\n",
            "Test cases:      4368\n",
            "After filtering: 162 (3.7%)\n",
            "Fails (rate):    85 (52.5%)\n",
            "\n",
            "Example fails:\n",
            "0.9 0.0 0.1 that is a hard service.\n",
            "1.0 0.0 0.0 I used to think that is a hard service.\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 it is a hard plane.\n",
            "0.9 0.0 0.1 I used to think it is a hard plane.\n",
            "\n",
            "----\n",
            "0.8 0.0 0.2 it is a hard airline.\n",
            "0.9 0.0 0.1 I used to think it is a hard airline.\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Negation\n",
            "\n",
            "simple negations: negative\n",
            "Test cases:      6318\n",
            "Fails (rate):    134 (2.1%)\n",
            "\n",
            "Example fails:\n",
            "0.0 1.0 0.0 I would never say I admire this plane.\n",
            "----\n",
            "0.2 0.8 0.0 I would never say I like that crew.\n",
            "----\n",
            "0.1 0.9 0.0 I would never say I like this airline.\n",
            "----\n",
            "\n",
            "\n",
            "simple negations: not negative\n",
            "Test cases:      6786\n",
            "Fails (rate):    6379 (94.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 0.0 0.0 The food isn't poor.\n",
            "----\n",
            "1.0 0.0 0.0 I don't think I despise that company.\n",
            "----\n",
            "0.9 0.0 0.1 This cabin crew is not lame.\n",
            "----\n",
            "\n",
            "\n",
            "simple negations: not neutral is still neutral\n",
            "Test cases:      2496\n",
            "Fails (rate):    2334 (93.5%)\n",
            "\n",
            "Example fails:\n",
            "1.0 0.0 0.0 I didn't see that cabin crew.\n",
            "----\n",
            "0.9 0.0 0.1 It wasn't an Israeli plane.\n",
            "----\n",
            "0.7 0.0 0.3 That isn't a commercial pilot.\n",
            "----\n",
            "\n",
            "\n",
            "simple negations: I thought x was positive, but it was not (should be negative)\n",
            "Test cases:      1992\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "simple negations: I thought x was negative, but it was not (should be neutral or positive)\n",
            "Test cases:      2124\n",
            "Fails (rate):    1436 (67.6%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 I thought that airline would be boring, but it was not.\n",
            "----\n",
            "0.8 0.0 0.2 I thought that food would be unhappy, but it was not.\n",
            "----\n",
            "0.8 0.0 0.2 I thought I would abhor this service, but I did not.\n",
            "----\n",
            "\n",
            "\n",
            "simple negations: but it was not (neutral) should still be neutral\n",
            "Test cases:      804\n",
            "Fails (rate):    764 (95.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 0.0 0.0 I thought I would find this airline, but I did not.\n",
            "----\n",
            "1.0 0.0 0.0 I thought the customer service would be American, but it was not.\n",
            "----\n",
            "0.9 0.0 0.1 I thought the aircraft would be Australian, but it wasn't.\n",
            "----\n",
            "\n",
            "\n",
            "Hard: Negation of positive with neutral stuff in the middle (should be negative)\n",
            "Test cases:      1000\n",
            "Fails (rate):    188 (18.8%)\n",
            "\n",
            "Example fails:\n",
            "0.1 0.9 0.0 I wouldn't say, given my history with airplanes, that that customer service is awesome.\n",
            "----\n",
            "0.2 0.8 0.0 I wouldn't say, given my history with airplanes, that this customer service was fantastic.\n",
            "----\n",
            "0.3 0.7 0.0 I can't say, given all that I've seen over the years, that the customer service was happy.\n",
            "----\n",
            "\n",
            "\n",
            "Hard: Negation of negative with neutral stuff in the middle (should be positive or neutral)\n",
            "Test cases:      1000\n",
            "Fails (rate):    972 (97.2%)\n",
            "\n",
            "Example fails:\n",
            "1.0 0.0 0.0 i wouldn't say, given it's a Tuesday, that that is a poor airline.\n",
            "----\n",
            "1.0 0.0 0.0 i wouldn't say, given my history with airplanes, that this was a bad crew.\n",
            "----\n",
            "0.9 0.0 0.1 i don't think, given my history with airplanes, that that is a weird plane.\n",
            "----\n",
            "\n",
            "\n",
            "negation of neutral with neutral in the middle, should still neutral\n",
            "Test cases:      1000\n",
            "Fails (rate):    860 (86.0%)\n",
            "\n",
            "Example fails:\n",
            "0.9 0.0 0.1 I don't think, given the time that I've been flying, that this was a commercial company.\n",
            "----\n",
            "1.0 0.0 0.0 I wouldn't say, given it's a Tuesday, that that was a commercial service.\n",
            "----\n",
            "0.9 0.0 0.1 I wouldn't say, given it's a Tuesday, that this cabin crew is Israeli.\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "SRL\n",
            "\n",
            "my opinion is what matters\n",
            "Test cases:      8528\n",
            "Fails (rate):    4689 (55.0%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 I think you are extraordinary, but I had heard you were sad.\n",
            "----\n",
            "1.0 0.0 0.0 I had heard you were sad, I think you are exciting.\n",
            "----\n",
            "0.9 0.0 0.1 I love you, but people regret you.\n",
            "----\n",
            "\n",
            "\n",
            "Q & A: yes\n",
            "Test cases:      7644\n",
            "Fails (rate):    3696 (48.4%)\n",
            "\n",
            "Example fails:\n",
            "0.4 0.6 0.0 Do I think it is a fantastic seat? Yes\n",
            "----\n",
            "0.4 0.6 0.0 Do I think this was a wonderful plane? Yes\n",
            "----\n",
            "0.4 0.6 0.0 Do I think it is a difficult company? Yes\n",
            "----\n",
            "\n",
            "\n",
            "Q & A: yes (neutral)\n",
            "Test cases:      1560\n",
            "Fails (rate):    921 (59.0%)\n",
            "\n",
            "Example fails:\n",
            "0.8 0.0 0.2 Do I think it was an American staff? Yes\n",
            "----\n",
            "0.7 0.0 0.3 Did we see the aircraft? Yes\n",
            "----\n",
            "0.9 0.0 0.1 Do I think it is an Israeli airline? Yes\n",
            "----\n",
            "\n",
            "\n",
            "Q & A: no\n",
            "Test cases:      7644\n",
            "Fails (rate):    4045 (52.9%)\n",
            "\n",
            "Example fails:\n",
            "0.9 0.0 0.1 Do I think the company is frustrating? No\n",
            "----\n",
            "1.0 0.0 0.0 Do I think this is a poor pilot? No\n",
            "----\n",
            "1.0 0.0 0.0 Do I think that seat was ugly? No\n",
            "----\n",
            "\n",
            "\n",
            "Q & A: no (neutral)\n",
            "Test cases:      1560\n",
            "Fails (rate):    1560 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "0.9 0.0 0.1 Do I think it is an Australian seat? No\n",
            "----\n",
            "1.0 0.0 0.0 Do I think this was a British food? No\n",
            "----\n",
            "0.9 0.0 0.1 Do I think this crew is Italian? No\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ULwm1FoNsbsd"
      },
      "source": [
        "# QQP CheckList"
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "cZ4EpNKMFDkR"
      },
      "source": [
        "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n",
        "\n",
        "models = [\n",
        "  'textattack/bert-base-uncased-QQP',\n",
        "  'textattack/albert-base-v2-QQP',\n",
        "  'textattack/bert-base-uncased-MRPC',\n",
        "  'textattack/albert-base-v2-MRPC'\n",
        "]\n",
        "\n",
        "# Load model\n",
        "tokenizer = AutoTokenizer.from_pretrained(models[3])\n",
        "\n",
        "model = AutoModelForSequenceClassification.from_pretrained(models[3])\n",
        "              \n",
        "model.to('cuda');\n",
        "model.eval();"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "R2RsB_pxFxpd"
      },
      "source": [
        "from checklist.pred_wrapper import PredictorWrapper\n",
        "\n",
        "def batch_qqp(data, batch_size=128):\n",
        "    ret = []\n",
        "    for d in chunks(data, batch_size):\n",
        "        t = tokenizer([a[0] for a in d], [a[1] for a in d], return_tensors='pt', padding=True).to('cuda')\n",
        "        with torch.no_grad():\n",
        "            logits = torch.softmax(model(**t)[0], dim=1).cpu().numpy()\n",
        "        ret.append(logits)\n",
        "    return np.vstack(ret)\n",
        "\n",
        "# wrapped_pp returns a tuple with (predictions, softmax confidences)\n",
        "wrapped_pp = PredictorWrapper.wrap_softmax(batch_qqp)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "M1zIqR8VFz06"
      },
      "source": [
        "# Load provided test suite\n",
        "suite_path = 'release_data/qqp/qqp_suite.pkl'\n",
        "suite = TestSuite.from_file(suite_path)"
      ],
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "f3qHSfu5O7_-",
        "outputId": "48854202-fa70-4792-e803-ff122f12b7f2"
      },
      "source": [
        "%time suite.run(wrapped_pp, seed=1) # textattack/bert-base-uncased-QQP"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running Modifier: adj\n",
            "Predicting 1000 examples\n",
            "Running different adjectives\n",
            "Predicting 954 examples\n",
            "Running Different animals\n",
            "Predicting 928 examples\n",
            "Running Irrelevant modifiers - animals\n",
            "Predicting 1000 examples\n",
            "Running Irrelevant modifiers - people\n",
            "Predicting 987 examples\n",
            "Running Irrelevant preamble with different examples.\n",
            "Predicting 938 examples\n",
            "Running Preamble is relevant (different injuries)\n",
            "Predicting 975 examples\n",
            "Running How can I become more {synonym}?\n",
            "Predicting 6000 examples\n",
            "Running (question, f(question)) where f(question) replaces synonyms?\n",
            "Predicting 326 examples\n",
            "Running Replace synonyms in real pairs\n",
            "Predicting 684 examples\n",
            "Running How can I become more X != How can I become less X\n",
            "Predicting 2000 examples\n",
            "Running How can I become more X = How can I become less antonym(X)\n",
            "Predicting 2000 examples\n",
            "Running add one typo\n",
            "Predicting 1500 examples\n",
            "Running contrations\n",
            "Predicting 1427 examples\n",
            "Running (q, paraphrase(q))\n",
            "Predicting 18944 examples\n",
            "Running Product of paraphrases(q1) * paraphrases(q2)\n",
            "Predicting 9756 examples\n",
            "Running same adjectives, different people\n",
            "Predicting 972 examples\n",
            "Running same adjectives, different people v2\n",
            "Predicting 984 examples\n",
            "Running same adjectives, different people v3\n",
            "Predicting 990 examples\n",
            "Running Change same name in both questions\n",
            "Predicting 5435 examples\n",
            "Running Change same location in both questions\n",
            "Predicting 5145 examples\n",
            "Running Change same number in both questions\n",
            "Predicting 4907 examples\n",
            "Running Change first name in one of the questions\n",
            "Predicting 9967 examples\n",
            "Running Change first and last name in one of the questions\n",
            "Predicting 13106 examples\n",
            "Running Change location in one of the questions\n",
            "Predicting 28241 examples\n",
            "Running Change numbers in one of the questions\n",
            "Predicting 28807 examples\n",
            "Running Keep entitites, fill in with gibberish\n",
            "Predicting 4649 examples\n",
            "Running Is person X != Did person use to be X\n",
            "Predicting 999 examples\n",
            "Running Is person X != Is person becoming X\n",
            "Predicting 1000 examples\n",
            "Running What was person's life before becoming X != What was person's life after becoming X\n",
            "Predicting 1000 examples\n",
            "Running Do you have to X your dog before Y it != Do you have to X your dog after Y it.\n",
            "Predicting 1000 examples\n",
            "Running Is it {ok, dangerous, ...} to {smoke, rest, ...} after != before\n",
            "Predicting 1000 examples\n",
            "Running How can I become a X person != How can I become a person who is not X\n",
            "Predicting 1000 examples\n",
            "Running Is it {ok, dangerous, ...} to {smoke, rest, ...} in country != Is it {ok, dangerous, ...} not to {smoke, rest, ...} in country\n",
            "Predicting 1000 examples\n",
            "Running What are things a {noun} should worry about != should not worry about.\n",
            "Predicting 1000 examples\n",
            "Running How can I become a X person == How can I become a person who is not antonym(X)\n",
            "Predicting 2000 examples\n",
            "Running Simple coref: he and she\n",
            "Predicting 2000 examples\n",
            "Running Simple coref: his and her\n",
            "Predicting 2000 examples\n",
            "Running Who do X think - Who is the ... according to X\n",
            "Predicting 1000 examples\n",
            "Running Order does not matter for comparison\n",
            "Predicting 2970 examples\n",
            "Running Order does not matter for symmetric relations\n",
            "Predicting 990 examples\n",
            "Running Order does matter for asymmetric relations\n",
            "Predicting 988 examples\n",
            "Running traditional SRL: active / passive swap\n",
            "Predicting 1000 examples\n",
            "Running traditional SRL: wrong active / passive swap\n",
            "Predicting 1000 examples\n",
            "Running traditional SRL: active / passive swap with people\n",
            "Predicting 990 examples\n",
            "Running traditional SRL: wrong active / passive swap with people\n",
            "Predicting 989 examples\n",
            "Running A or B is not the same as C and D\n",
            "Predicting 828 examples\n",
            "Running A or B is not the same as A and B\n",
            "Predicting 971 examples\n",
            "Running A and / or B is the same as B and / or A\n",
            "Predicting 970 examples\n",
            "Running a {nationality} {profession} = a {profession} and {nationality}\n",
            "Predicting 1000 examples\n",
            "Running Reflexivity: (q, q) should be duplicate\n",
            "Predicting 1000 examples\n",
            "Running Symmetry: f(a, b) = f(b, a)\n",
            "Predicting 1000 examples\n",
            "Running Testing implications\n",
            "Predicting 24984 examples\n",
            "CPU times: user 5min 23s, sys: 4min 6s, total: 9min 29s\n",
            "Wall time: 9min 17s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "xCC3lYXC730Z",
        "outputId": "5a959517-3c3f-464b-b565-549e20898e91"
      },
      "source": [
        "%time suite.run(wrapped_pp, seed=1) # textattack/albert-base-v2-QQP"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running Modifier: adj\n",
            "Predicting 1000 examples\n",
            "Running different adjectives\n",
            "Predicting 954 examples\n",
            "Running Different animals\n",
            "Predicting 928 examples\n",
            "Running Irrelevant modifiers - animals\n",
            "Predicting 1000 examples\n",
            "Running Irrelevant modifiers - people\n",
            "Predicting 987 examples\n",
            "Running Irrelevant preamble with different examples.\n",
            "Predicting 938 examples\n",
            "Running Preamble is relevant (different injuries)\n",
            "Predicting 975 examples\n",
            "Running How can I become more {synonym}?\n",
            "Predicting 6000 examples\n",
            "Running (question, f(question)) where f(question) replaces synonyms?\n",
            "Predicting 326 examples\n",
            "Running Replace synonyms in real pairs\n",
            "Predicting 684 examples\n",
            "Running How can I become more X != How can I become less X\n",
            "Predicting 2000 examples\n",
            "Running How can I become more X = How can I become less antonym(X)\n",
            "Predicting 2000 examples\n",
            "Running add one typo\n",
            "Predicting 1500 examples\n",
            "Running contrations\n",
            "Predicting 1427 examples\n",
            "Running (q, paraphrase(q))\n",
            "Predicting 18944 examples\n",
            "Running Product of paraphrases(q1) * paraphrases(q2)\n",
            "Predicting 9756 examples\n",
            "Running same adjectives, different people\n",
            "Predicting 972 examples\n",
            "Running same adjectives, different people v2\n",
            "Predicting 984 examples\n",
            "Running same adjectives, different people v3\n",
            "Predicting 990 examples\n",
            "Running Change same name in both questions\n",
            "Predicting 5435 examples\n",
            "Running Change same location in both questions\n",
            "Predicting 5145 examples\n",
            "Running Change same number in both questions\n",
            "Predicting 4907 examples\n",
            "Running Change first name in one of the questions\n",
            "Predicting 9967 examples\n",
            "Running Change first and last name in one of the questions\n",
            "Predicting 13106 examples\n",
            "Running Change location in one of the questions\n",
            "Predicting 28241 examples\n",
            "Running Change numbers in one of the questions\n",
            "Predicting 28807 examples\n",
            "Running Keep entitites, fill in with gibberish\n",
            "Predicting 4649 examples\n",
            "Running Is person X != Did person use to be X\n",
            "Predicting 999 examples\n",
            "Running Is person X != Is person becoming X\n",
            "Predicting 1000 examples\n",
            "Running What was person's life before becoming X != What was person's life after becoming X\n",
            "Predicting 1000 examples\n",
            "Running Do you have to X your dog before Y it != Do you have to X your dog after Y it.\n",
            "Predicting 1000 examples\n",
            "Running Is it {ok, dangerous, ...} to {smoke, rest, ...} after != before\n",
            "Predicting 1000 examples\n",
            "Running How can I become a X person != How can I become a person who is not X\n",
            "Predicting 1000 examples\n",
            "Running Is it {ok, dangerous, ...} to {smoke, rest, ...} in country != Is it {ok, dangerous, ...} not to {smoke, rest, ...} in country\n",
            "Predicting 1000 examples\n",
            "Running What are things a {noun} should worry about != should not worry about.\n",
            "Predicting 1000 examples\n",
            "Running How can I become a X person == How can I become a person who is not antonym(X)\n",
            "Predicting 2000 examples\n",
            "Running Simple coref: he and she\n",
            "Predicting 2000 examples\n",
            "Running Simple coref: his and her\n",
            "Predicting 2000 examples\n",
            "Running Who do X think - Who is the ... according to X\n",
            "Predicting 1000 examples\n",
            "Running Order does not matter for comparison\n",
            "Predicting 2970 examples\n",
            "Running Order does not matter for symmetric relations\n",
            "Predicting 990 examples\n",
            "Running Order does matter for asymmetric relations\n",
            "Predicting 988 examples\n",
            "Running traditional SRL: active / passive swap\n",
            "Predicting 1000 examples\n",
            "Running traditional SRL: wrong active / passive swap\n",
            "Predicting 1000 examples\n",
            "Running traditional SRL: active / passive swap with people\n",
            "Predicting 990 examples\n",
            "Running traditional SRL: wrong active / passive swap with people\n",
            "Predicting 989 examples\n",
            "Running A or B is not the same as C and D\n",
            "Predicting 828 examples\n",
            "Running A or B is not the same as A and B\n",
            "Predicting 971 examples\n",
            "Running A and / or B is the same as B and / or A\n",
            "Predicting 970 examples\n",
            "Running a {nationality} {profession} = a {profession} and {nationality}\n",
            "Predicting 1000 examples\n",
            "Running Reflexivity: (q, q) should be duplicate\n",
            "Predicting 1000 examples\n",
            "Running Symmetry: f(a, b) = f(b, a)\n",
            "Predicting 1000 examples\n",
            "Running Testing implications\n",
            "Predicting 24984 examples\n",
            "CPU times: user 6min 7s, sys: 4min 39s, total: 10min 47s\n",
            "Wall time: 10min 33s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CR299kmq-tri",
        "outputId": "499a7c2d-5834-49da-8299-166d5e601dc4"
      },
      "source": [
        "%time suite.run(wrapped_pp, seed=1) # textattack/bert-base-uncased-MRPC"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running Modifier: adj\n",
            "Predicting 1000 examples\n",
            "Running different adjectives\n",
            "Predicting 954 examples\n",
            "Running Different animals\n",
            "Predicting 928 examples\n",
            "Running Irrelevant modifiers - animals\n",
            "Predicting 1000 examples\n",
            "Running Irrelevant modifiers - people\n",
            "Predicting 987 examples\n",
            "Running Irrelevant preamble with different examples.\n",
            "Predicting 938 examples\n",
            "Running Preamble is relevant (different injuries)\n",
            "Predicting 975 examples\n",
            "Running How can I become more {synonym}?\n",
            "Predicting 6000 examples\n",
            "Running (question, f(question)) where f(question) replaces synonyms?\n",
            "Predicting 326 examples\n",
            "Running Replace synonyms in real pairs\n",
            "Predicting 684 examples\n",
            "Running How can I become more X != How can I become less X\n",
            "Predicting 2000 examples\n",
            "Running How can I become more X = How can I become less antonym(X)\n",
            "Predicting 2000 examples\n",
            "Running add one typo\n",
            "Predicting 1500 examples\n",
            "Running contrations\n",
            "Predicting 1427 examples\n",
            "Running (q, paraphrase(q))\n",
            "Predicting 18944 examples\n",
            "Running Product of paraphrases(q1) * paraphrases(q2)\n",
            "Predicting 9756 examples\n",
            "Running same adjectives, different people\n",
            "Predicting 972 examples\n",
            "Running same adjectives, different people v2\n",
            "Predicting 984 examples\n",
            "Running same adjectives, different people v3\n",
            "Predicting 990 examples\n",
            "Running Change same name in both questions\n",
            "Predicting 5435 examples\n",
            "Running Change same location in both questions\n",
            "Predicting 5145 examples\n",
            "Running Change same number in both questions\n",
            "Predicting 4907 examples\n",
            "Running Change first name in one of the questions\n",
            "Predicting 9967 examples\n",
            "Running Change first and last name in one of the questions\n",
            "Predicting 13106 examples\n",
            "Running Change location in one of the questions\n",
            "Predicting 28241 examples\n",
            "Running Change numbers in one of the questions\n",
            "Predicting 28807 examples\n",
            "Running Keep entitites, fill in with gibberish\n",
            "Predicting 4649 examples\n",
            "Running Is person X != Did person use to be X\n",
            "Predicting 999 examples\n",
            "Running Is person X != Is person becoming X\n",
            "Predicting 1000 examples\n",
            "Running What was person's life before becoming X != What was person's life after becoming X\n",
            "Predicting 1000 examples\n",
            "Running Do you have to X your dog before Y it != Do you have to X your dog after Y it.\n",
            "Predicting 1000 examples\n",
            "Running Is it {ok, dangerous, ...} to {smoke, rest, ...} after != before\n",
            "Predicting 1000 examples\n",
            "Running How can I become a X person != How can I become a person who is not X\n",
            "Predicting 1000 examples\n",
            "Running Is it {ok, dangerous, ...} to {smoke, rest, ...} in country != Is it {ok, dangerous, ...} not to {smoke, rest, ...} in country\n",
            "Predicting 1000 examples\n",
            "Running What are things a {noun} should worry about != should not worry about.\n",
            "Predicting 1000 examples\n",
            "Running How can I become a X person == How can I become a person who is not antonym(X)\n",
            "Predicting 2000 examples\n",
            "Running Simple coref: he and she\n",
            "Predicting 2000 examples\n",
            "Running Simple coref: his and her\n",
            "Predicting 2000 examples\n",
            "Running Who do X think - Who is the ... according to X\n",
            "Predicting 1000 examples\n",
            "Running Order does not matter for comparison\n",
            "Predicting 2970 examples\n",
            "Running Order does not matter for symmetric relations\n",
            "Predicting 990 examples\n",
            "Running Order does matter for asymmetric relations\n",
            "Predicting 988 examples\n",
            "Running traditional SRL: active / passive swap\n",
            "Predicting 1000 examples\n",
            "Running traditional SRL: wrong active / passive swap\n",
            "Predicting 1000 examples\n",
            "Running traditional SRL: active / passive swap with people\n",
            "Predicting 990 examples\n",
            "Running traditional SRL: wrong active / passive swap with people\n",
            "Predicting 989 examples\n",
            "Running A or B is not the same as C and D\n",
            "Predicting 828 examples\n",
            "Running A or B is not the same as A and B\n",
            "Predicting 971 examples\n",
            "Running A and / or B is the same as B and / or A\n",
            "Predicting 970 examples\n",
            "Running a {nationality} {profession} = a {profession} and {nationality}\n",
            "Predicting 1000 examples\n",
            "Running Reflexivity: (q, q) should be duplicate\n",
            "Predicting 1000 examples\n",
            "Running Symmetry: f(a, b) = f(b, a)\n",
            "Predicting 1000 examples\n",
            "Running Testing implications\n",
            "Predicting 24984 examples\n",
            "CPU times: user 5min 23s, sys: 4min 6s, total: 9min 30s\n",
            "Wall time: 9min 18s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "D4jJjIVrBq9C",
        "outputId": "90cd826f-5ceb-4443-bf6d-45be8e839657"
      },
      "source": [
        "%time suite.run(wrapped_pp, seed=1) # textattack/albert-base-v2-MRPC"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Running Modifier: adj\n",
            "Predicting 1000 examples\n",
            "Running different adjectives\n",
            "Predicting 954 examples\n",
            "Running Different animals\n",
            "Predicting 928 examples\n",
            "Running Irrelevant modifiers - animals\n",
            "Predicting 1000 examples\n",
            "Running Irrelevant modifiers - people\n",
            "Predicting 987 examples\n",
            "Running Irrelevant preamble with different examples.\n",
            "Predicting 938 examples\n",
            "Running Preamble is relevant (different injuries)\n",
            "Predicting 975 examples\n",
            "Running How can I become more {synonym}?\n",
            "Predicting 6000 examples\n",
            "Running (question, f(question)) where f(question) replaces synonyms?\n",
            "Predicting 326 examples\n",
            "Running Replace synonyms in real pairs\n",
            "Predicting 684 examples\n",
            "Running How can I become more X != How can I become less X\n",
            "Predicting 2000 examples\n",
            "Running How can I become more X = How can I become less antonym(X)\n",
            "Predicting 2000 examples\n",
            "Running add one typo\n",
            "Predicting 1500 examples\n",
            "Running contrations\n",
            "Predicting 1427 examples\n",
            "Running (q, paraphrase(q))\n",
            "Predicting 18944 examples\n",
            "Running Product of paraphrases(q1) * paraphrases(q2)\n",
            "Predicting 9756 examples\n",
            "Running same adjectives, different people\n",
            "Predicting 972 examples\n",
            "Running same adjectives, different people v2\n",
            "Predicting 984 examples\n",
            "Running same adjectives, different people v3\n",
            "Predicting 990 examples\n",
            "Running Change same name in both questions\n",
            "Predicting 5435 examples\n",
            "Running Change same location in both questions\n",
            "Predicting 5145 examples\n",
            "Running Change same number in both questions\n",
            "Predicting 4907 examples\n",
            "Running Change first name in one of the questions\n",
            "Predicting 9967 examples\n",
            "Running Change first and last name in one of the questions\n",
            "Predicting 13106 examples\n",
            "Running Change location in one of the questions\n",
            "Predicting 28241 examples\n",
            "Running Change numbers in one of the questions\n",
            "Predicting 28807 examples\n",
            "Running Keep entitites, fill in with gibberish\n",
            "Predicting 4649 examples\n",
            "Running Is person X != Did person use to be X\n",
            "Predicting 999 examples\n",
            "Running Is person X != Is person becoming X\n",
            "Predicting 1000 examples\n",
            "Running What was person's life before becoming X != What was person's life after becoming X\n",
            "Predicting 1000 examples\n",
            "Running Do you have to X your dog before Y it != Do you have to X your dog after Y it.\n",
            "Predicting 1000 examples\n",
            "Running Is it {ok, dangerous, ...} to {smoke, rest, ...} after != before\n",
            "Predicting 1000 examples\n",
            "Running How can I become a X person != How can I become a person who is not X\n",
            "Predicting 1000 examples\n",
            "Running Is it {ok, dangerous, ...} to {smoke, rest, ...} in country != Is it {ok, dangerous, ...} not to {smoke, rest, ...} in country\n",
            "Predicting 1000 examples\n",
            "Running What are things a {noun} should worry about != should not worry about.\n",
            "Predicting 1000 examples\n",
            "Running How can I become a X person == How can I become a person who is not antonym(X)\n",
            "Predicting 2000 examples\n",
            "Running Simple coref: he and she\n",
            "Predicting 2000 examples\n",
            "Running Simple coref: his and her\n",
            "Predicting 2000 examples\n",
            "Running Who do X think - Who is the ... according to X\n",
            "Predicting 1000 examples\n",
            "Running Order does not matter for comparison\n",
            "Predicting 2970 examples\n",
            "Running Order does not matter for symmetric relations\n",
            "Predicting 990 examples\n",
            "Running Order does matter for asymmetric relations\n",
            "Predicting 988 examples\n",
            "Running traditional SRL: active / passive swap\n",
            "Predicting 1000 examples\n",
            "Running traditional SRL: wrong active / passive swap\n",
            "Predicting 1000 examples\n",
            "Running traditional SRL: active / passive swap with people\n",
            "Predicting 990 examples\n",
            "Running traditional SRL: wrong active / passive swap with people\n",
            "Predicting 989 examples\n",
            "Running A or B is not the same as C and D\n",
            "Predicting 828 examples\n",
            "Running A or B is not the same as A and B\n",
            "Predicting 971 examples\n",
            "Running A and / or B is the same as B and / or A\n",
            "Predicting 970 examples\n",
            "Running a {nationality} {profession} = a {profession} and {nationality}\n",
            "Predicting 1000 examples\n",
            "Running Reflexivity: (q, q) should be duplicate\n",
            "Predicting 1000 examples\n",
            "Running Symmetry: f(a, b) = f(b, a)\n",
            "Predicting 1000 examples\n",
            "Running Testing implications\n",
            "Predicting 24984 examples\n",
            "CPU times: user 6min 8s, sys: 4min 39s, total: 10min 47s\n",
            "Wall time: 10min 33s\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "kMJejhU5QOAy",
        "outputId": "beba1b11-0370-424f-e4f5-6759a0dd46a1"
      },
      "source": [
        "suite.summary()"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Vocabulary\n",
            "\n",
            "Modifier: adj\n",
            "Test cases:      1000\n",
            "Fails (rate):    1000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Is Aaron Sanders an editor?', 'Is Aaron Sanders a successful editor?')\n",
            "----\n",
            "1.0 ('Is Emily Thompson an actor?', 'Is Emily Thompson an elite actor?')\n",
            "----\n",
            "1.0 ('Is Jason Thomas an organizer?', 'Is Jason Thomas an outstanding organizer?')\n",
            "----\n",
            "\n",
            "\n",
            "different adjectives\n",
            "Test cases:      954\n",
            "Fails (rate):    558 (58.5%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Is John Young white?', 'Is John Young Armenian?')\n",
            "----\n",
            "1.0 ('Is Dylan Hill white?', 'Is Dylan Hill Australian?')\n",
            "----\n",
            "0.8 ('Is Kyle Harris Jewish?', 'Is Kyle Harris racist?')\n",
            "----\n",
            "\n",
            "\n",
            "Different animals\n",
            "Test cases:      928\n",
            "Fails (rate):    928 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Can I feed my snail eggs?', 'Can I feed my squirrel eggs?')\n",
            "----\n",
            "1.0 ('Can I feed my monkey seeds?', 'Can I feed my goat seeds?')\n",
            "----\n",
            "1.0 ('Can I feed my chicken carrots?', 'Can I feed my cat carrots?')\n",
            "----\n",
            "\n",
            "\n",
            "Irrelevant modifiers - animals\n",
            "Test cases:      1000\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "Irrelevant modifiers - people\n",
            "Test cases:      987\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "Irrelevant preamble with different examples.\n",
            "Test cases:      938\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "Preamble is relevant (different injuries)\n",
            "Test cases:      975\n",
            "Fails (rate):    975 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('I hurt my nose last time I played football. Is it normal to hurt this part of the body?', 'I hurt my skin last time I played football. Is it normal to hurt this part of the body?')\n",
            "----\n",
            "1.0 ('I hurt my feet last time I played football. Is this going to impact my performance?', 'I hurt my rib last time I played football. Is this going to impact my performance?')\n",
            "----\n",
            "1.0 ('I hurt my hip last time I played golf. Is this a common injury?', 'I hurt my arm last time I played golf. Is this a common injury?')\n",
            "----\n",
            "\n",
            "\n",
            "How can I become more X != How can I become less X\n",
            "Test cases:      2000\n",
            "Fails (rate):    2000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('How can I become less passive?', 'How can I become more passive?')\n",
            "----\n",
            "1.0 ('How can I become less invisible?', 'How can I become more invisible?')\n",
            "----\n",
            "1.0 ('How can I become more negative?', 'How can I become less negative?')\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Taxonomy\n",
            "\n",
            "How can I become more {synonym}?\n",
            "Test cases:      6000\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "(question, f(question)) where f(question) replaces synonyms?\n",
            "Test cases:      326\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "Replace synonyms in real pairs\n",
            "Test cases:      251\n",
            "Fails (rate):    9 (3.6%)\n",
            "\n",
            "Example fails:\n",
            "0.1 ('Do you have to be intelligent to be intelligent?', 'What can I do to become smarter?')\n",
            "0.8 ('Do you have to be smart to be smart?', 'What can I do to become smarter?')\n",
            "\n",
            "----\n",
            "0.4 ('How do you say happy birthday in Korean, both formally and informally?', '\"What is the translation of \"\"happy birthday\"\" to Korean?\"')\n",
            "0.6 ('How do you say joyful birthday in Korean, both formally and informally?', '\"What is the translation of \"\"joyful birthday\"\" to Korean?\"')\n",
            "0.6 ('How do you say happy birthday in Korean, both formally and informally?', '\"What is the translation of \"\"joyful birthday\"\" to Korean?\"')\n",
            "\n",
            "----\n",
            "0.4 ('\"What is the Japanese word for \"\"happy\"\"?\"', '\"What is the Japanese word for \"\"much\"\"?\"')\n",
            "0.7 ('\"What is the Japanese word for \"\"joyful\"\"?\"', '\"What is the Japanese word for \"\"much\"\"?\"')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "How can I become more X = How can I become less antonym(X)\n",
            "Test cases:      2000\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Robustness\n",
            "\n",
            "add one typo\n",
            "Test cases:      500\n",
            "Fails (rate):    57 (11.4%)\n",
            "\n",
            "Example fails:\n",
            "0.6 ('How do I access the ExtraTorrent website?', 'Can we access a website on the deep web with the URL?')\n",
            "0.2 ('How do  Iaccess the ExtraTorrent website?', 'Can we access a website on the deep web with the URL?')\n",
            "\n",
            "----\n",
            "1.0 ('What is spoofing?', 'What does spoof mean?')\n",
            "0.1 ('What is spoofing?', 'Whta does spoof mean?')\n",
            "\n",
            "----\n",
            "0.7 ('CALL any time @=@1-800–251–4919 @=@ microsoft windows 7 technical support phone number?', 'Is there a technical support phone number for Microsoft Windows 10?')\n",
            "0.3 ('CALL any time @=@1-800–251–4919 @=@ microsoft windows 7 technical support phone number?', 'Ist here a technical support phone number for Microsoft Windows 10?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "contrations\n",
            "Test cases:      500\n",
            "Fails (rate):    12 (2.4%)\n",
            "\n",
            "Example fails:\n",
            "0.2 ('What is turbulence on a flight?', 'What is turbulence can someone explain why it happens?')\n",
            "0.6 (\"What's turbulence on a flight?\", 'What is turbulence can someone explain why it happens?')\n",
            "\n",
            "----\n",
            "0.7 (\"What's it like to get accepted to Harvard but not go?\", 'What is it like to get admitted to Harvard?')\n",
            "0.2 (\"What's it like to get accepted to Harvard but not go?\", \"What's it like to get admitted to Harvard?\")\n",
            "\n",
            "----\n",
            "0.7 (\"Why doesn't Apple put all the songs available in iTunes on Apple music?\", \"Why doesn't Apple Music cost the same in different countries?\")\n",
            "0.3 ('Why does not Apple put all the songs available in iTunes on Apple music?', \"Why doesn't Apple Music cost the same in different countries?\")\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "(q, paraphrase(q))\n",
            "Test cases:      200\n",
            "Fails (rate):    14 (7.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('How do I improve to express my emotions?', 'How do I improve to express my emotions?')\n",
            "0.4 ('If I want to improve to express my emotions, what should I do?', 'What is a good way to improve to express your emotions?')\n",
            "\n",
            "----\n",
            "1.0 ('How do I tie a tie different from others?', 'How do I tie a tie different from others?')\n",
            "0.5 ('If you want to tie a tie different from others, what should you do?', 'What is a good way to tie a tie different from others?')\n",
            "0.5 ('If you want to tie a tie different from others, what should you do?', 'What is a good way to tie a tie different from others?')\n",
            "\n",
            "----\n",
            "1.0 ('How do I know my not so known crush likes me?', 'How do I know my not so known crush likes me?')\n",
            "0.3 ('How do you know your not so known crush likes me?', 'In order to know my not so known crush likes me, what should I do?')\n",
            "0.3 ('How do you know your not so known crush likes me?', 'If I want to know my not so known crush likes me, what should I do?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "Product of paraphrases(q1) * paraphrases(q2)\n",
            "Test cases:      100\n",
            "Fails (rate):    51 (51.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('How can I watch any clip4sale video without paying?', 'Can I use an old game that is now free in a video without paying royalties?')\n",
            "0.1 ('If you want to watch any clip4sale video without paying, what should you do?', 'Do you think I can use an old game that is now free in a video without paying royalties?')\n",
            "0.1 ('In order to watch any clip4sale video without paying, what should you do?', 'Do you think I can use an old game that is now free in a video without paying royalties?')\n",
            "\n",
            "----\n",
            "1.0 ('How do I stop procrastinating my study?', 'How can I stop procrastinating when I try to study or do homework?')\n",
            "0.5 ('How do you stop procrastinating your study?', 'If I want to stop procrastinating when I try to study or do homework, what should I do?')\n",
            "0.5 ('How can you stop procrastinating your study?', 'If I want to stop procrastinating when I try to study or do homework, what should I do?')\n",
            "\n",
            "----\n",
            "0.9 ('How can I bookmark any question on Quora to read it later?', 'Can I bookmark a question on Quora?')\n",
            "0.0 ('If I want to bookmark any question on Quora to read it later, what should I do?', 'Do you think you can bookmark a question on Quora?')\n",
            "0.0 ('If I want to bookmark any question on Quora to read it later, what should I do?', 'Do you think you can bookmark a question on Quora?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "NER\n",
            "\n",
            "same adjectives, different people\n",
            "Test cases:      972\n",
            "Fails (rate):    867 (89.2%)\n",
            "\n",
            "Example fails:\n",
            "0.7 ('Is Nathan Harris Indian?', 'Is Kyle Garcia Indian?')\n",
            "----\n",
            "0.9 ('Is Jonathan Hall American?', 'Is Daniel Peterson American?')\n",
            "----\n",
            "1.0 ('Is Ethan White racist?', 'Is Jonathan Lopez racist?')\n",
            "----\n",
            "\n",
            "\n",
            "same adjectives, different people v2\n",
            "Test cases:      984\n",
            "Fails (rate):    984 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Is Kyle Morris gay?', 'Is Matthew Morris gay?')\n",
            "----\n",
            "1.0 ('Is Maria Barnes Muslim?', 'Is Jeffrey Barnes Muslim?')\n",
            "----\n",
            "1.0 ('Is Jonathan Watson evil?', 'Is Olivia Watson evil?')\n",
            "----\n",
            "\n",
            "\n",
            "same adjectives, different people v3\n",
            "Test cases:      990\n",
            "Fails (rate):    990 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Is Scott Green gay?', 'Is Scott Perry gay?')\n",
            "----\n",
            "1.0 ('Is Alexis Rodriguez famous?', 'Is Alexis Morgan famous?')\n",
            "----\n",
            "1.0 ('Is William Richardson immortal?', 'Is William Sanders immortal?')\n",
            "----\n",
            "\n",
            "\n",
            "Change same name in both questions\n",
            "Test cases:      500\n",
            "Fails (rate):    52 (10.4%)\n",
            "\n",
            "Example fails:\n",
            "0.3 ('What will happen if Donald Trump became the president of America?', 'What will happen now that President-elect Donald Trump has won the election?')\n",
            "0.6 ('What will happen if John Morales became the president of America?', 'What will happen now that President-elect John Morales has won the election?')\n",
            "0.6 ('What will happen if Joshua Garcia became the president of America?', 'What will happen now that President-elect Joshua Garcia has won the election?')\n",
            "\n",
            "----\n",
            "0.5 ('How might spending a week at Burning Man affect Donald Trump?', 'What might happen now that President-elect Donald Trump has won the election? What will be the impact?')\n",
            "0.6 ('How might spending a week at Burning Man affect Daniel Nelson?', 'What might happen now that President-elect Daniel Nelson has won the election? What will be the impact?')\n",
            "0.6 ('How might spending a week at Burning Man affect Matthew Jones?', 'What might happen now that President-elect Matthew Jones has won the election? What will be the impact?')\n",
            "\n",
            "----\n",
            "0.4 ('How much money has J.K. Rowling made from the Harry Potter movies?', 'How much money could JK Rowling make if she wrote an 8th Harry Potter book?')\n",
            "0.8 ('How much money has J.K. Rowling made from the Matthew Jones movies?', 'How much money could JK Rowling make if she wrote an 8th Matthew Jones book?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "Change same location in both questions\n",
            "Test cases:      500\n",
            "Fails (rate):    30 (6.0%)\n",
            "\n",
            "Example fails:\n",
            "0.2 ('What are some of the movies of Hollywood that you must watch?', 'What are the best top 10 movies of Hollywood ever?')\n",
            "0.6 ('What are some of the movies of Chino Hills that you must watch?', 'What are the best top 10 movies of Chino Hills ever?')\n",
            "\n",
            "----\n",
            "0.9 ('What is the history of the 130 Montgomery Street building in San Francisco?', 'What is the longest street in San Francisco?')\n",
            "0.5 ('What is the history of the 130 Montgomery Street building in Orland Park?', 'What is the longest street in Orland Park?')\n",
            "\n",
            "----\n",
            "0.6 ('In a war between USA and India, could USA defeat India and occupy it?', \"What will happen if USA and Russia declare war? Who's side will India be?\")\n",
            "0.4 ('In a war between USA and Ireland, could USA defeat Ireland and occupy it?', \"What will happen if USA and Russia declare war? Who's side will Ireland be?\")\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "Change same number in both questions\n",
            "Test cases:      500\n",
            "Fails (rate):    19 (3.8%)\n",
            "\n",
            "Example fails:\n",
            "0.5 ('What is the KVPY SA expected cut off for 2016?', 'How was KVPY SA 2016?')\n",
            "0.2 ('What is the KVPY SA expected cut off for 1796?', 'How was KVPY SA 1796?')\n",
            "0.2 ('What is the KVPY SA expected cut off for 2214?', 'How was KVPY SA 2214?')\n",
            "\n",
            "----\n",
            "0.6 ('What are the repercussions of 500 and 1000 rupee notes not being legal tender anymore?', 'What could be the consequences of recalling 500 and 1000 rupee note?')\n",
            "0.5 ('What are the repercussions of 542 and 1000 rupee notes not being legal tender anymore?', 'What could be the consequences of recalling 542 and 1000 rupee note?')\n",
            "\n",
            "----\n",
            "0.3 ('What will be the effect of banning 500 and 1000 Rs notes on real estate sector in India? Can we expect sharp fall in prices in short/long term?', 'What will the real estate look like now after the 500 and 1000 scraping?')\n",
            "0.6 ('What will be the effect of banning 451 and 1000 Rs notes on real estate sector in India? Can we expect sharp fall in prices in short/long term?', 'What will the real estate look like now after the 451 and 1000 scraping?')\n",
            "0.6 ('What will be the effect of banning 595 and 1000 Rs notes on real estate sector in India? Can we expect sharp fall in prices in short/long term?', 'What will the real estate look like now after the 595 and 1000 scraping?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "Change first name in one of the questions\n",
            "Test cases:      500\n",
            "After filtering: 307 (61.4%)\n",
            "Fails (rate):    302 (98.4%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('If the United States has a female president, will her husband be called the first gentleman? What will Bill Clinton be called if Hillary is elected?', 'If Hillary Clinton were to be elected, would Bill Clinton still be called Mr. President?')\n",
            "1.0 ('If the United States has a female president, will her husband be called the first gentleman? What will Bill Clinton be called if Hillary is elected?', 'If Hillary Clinton were to be elected, would Edward Clinton still be called Mr. President?')\n",
            "1.0 ('If the United States has a female president, will her husband be called the first gentleman? What will Bill Clinton be called if Hillary is elected?', 'If Hillary Clinton were to be elected, would Travis Clinton still be called Mr. President?')\n",
            "\n",
            "----\n",
            "1.0 ('What does Jimmy Wales think of people who say Wikipedia is a bad source for correct information?', 'What does Jimmy Wales think about Wikipedia being considered as an unreliable source?')\n",
            "1.0 ('What does Carlos Wales think of people who say Wikipedia is a bad source for correct information?', 'What does Jimmy Wales think about Wikipedia being considered as an unreliable source?')\n",
            "1.0 ('What does Jimmy Wales think of people who say Wikipedia is a bad source for correct information?', 'What does Jared Wales think about Wikipedia being considered as an unreliable source?')\n",
            "\n",
            "----\n",
            "1.0 ('Will people vote for Hillary Clinton because she is a woman?', \"Should people vote for Hillary Clinton just because she's a woman?\")\n",
            "1.0 ('Will people vote for Hillary Clinton because she is a woman?', \"Should people vote for Julia Clinton just because she's a woman?\")\n",
            "1.0 ('Will people vote for Hillary Clinton because she is a woman?', \"Should people vote for Shannon Clinton just because she's a woman?\")\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "Change first and last name in one of the questions\n",
            "Test cases:      682\n",
            "After filtering: 429 (62.9%)\n",
            "Fails (rate):    373 (86.9%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('How is Vajiram and Ravi for IAS preparation?', 'How is Vajiram and Ravi for ies?')\n",
            "1.0 ('How is Vajiram and Ravi for IAS preparation?', 'How is Vajiram and Alex for ies?')\n",
            "1.0 ('How is Vajiram and Ravi for IAS preparation?', 'How is Vajiram and Lucas for ies?')\n",
            "\n",
            "----\n",
            "1.0 ('What is your favorite Avril Lavigne song?', 'Which is the best song by Avril Lavigne you love the most  and why?')\n",
            "0.9 ('What is your favorite Nicole Martinez song?', 'Which is the best song by Avril Lavigne you love the most  and why?')\n",
            "0.9 ('What is your favorite Amanda Morales song?', 'Which is the best song by Avril Lavigne you love the most  and why?')\n",
            "\n",
            "----\n",
            "1.0 ('Can Donald Trump still win the 2016 U.S. Presidential Election?', 'Does Donald Trump still have a chance of winning?')\n",
            "1.0 ('Can William Martinez still win the 2016 U.S. Presidential Election?', 'Does Donald Trump still have a chance of winning?')\n",
            "1.0 ('Can Joshua Garcia still win the 2016 U.S. Presidential Election?', 'Does Donald Trump still have a chance of winning?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "Change location in one of the questions\n",
            "Test cases:      1386\n",
            "After filtering: 1005 (72.5%)\n",
            "Fails (rate):    960 (95.5%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Which is the best institute for public speaking in Delhi, India?', 'Which are the top institutes for public speaking in India?')\n",
            "1.0 ('Which is the best institute for public speaking in Delhi, India?', 'Which are the top institutes for public speaking in Samoa?')\n",
            "1.0 ('Which is the best institute for public speaking in Delhi, India?', 'Which are the top institutes for public speaking in Gibraltar?')\n",
            "\n",
            "----\n",
            "0.8 ('What are the best things to buy in Germany?', \"What's best and cheaper things in Germany compared to India?\")\n",
            "0.7 ('What are the best things to buy in Montenegro?', \"What's best and cheaper things in Germany compared to India?\")\n",
            "0.7 ('What are the best things to buy in Germany?', \"What's best and cheaper things in Montenegro compared to India?\")\n",
            "\n",
            "----\n",
            "1.0 ('What is the relationship between North Korea and Japan?', 'What is the relationship like between North Korea and Japan?')\n",
            "1.0 ('What is the relationship between North Korea and Tuvalu?', 'What is the relationship like between North Korea and Japan?')\n",
            "1.0 ('What is the relationship between North Korea and Kenya?', 'What is the relationship like between North Korea and Japan?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "Change numbers in one of the questions\n",
            "Test cases:      1500\n",
            "After filtering: 1085 (72.3%)\n",
            "Fails (rate):    1066 (98.2%)\n",
            "\n",
            "Example fails:\n",
            "0.5 ('What are your views on banning 500 and 1000 rupee notes? How does it affect black money and is it really gonna work and expose all the black money?', 'What do you think of the decision by the Indian Government to demonetize 500 and 1000 rupee notes?')\n",
            "0.7 ('What are your views on banning 500 and 1000 rupee notes? How does it affect black money and is it really gonna work and expose all the black money?', 'What do you think of the decision by the Indian Government to demonetize 431 and 1000 rupee notes?')\n",
            "0.6 ('What are your views on banning 500 and 1000 rupee notes? How does it affect black money and is it really gonna work and expose all the black money?', 'What do you think of the decision by the Indian Government to demonetize 500 and 1199 rupee notes?')\n",
            "\n",
            "----\n",
            "0.6 ('Why did the Indian government demonetize the current 500 and 1000 rupee notes and replace them with new notes?', \"Doesn't it defeat the purpose of demonetizing 500 and 1000 rupee bill if the Government of India introduces new 500 and 2000 rupee bills?\")\n",
            "0.6 ('Why did the Indian government demonetize the current 500 and 1000 rupee notes and replace them with new notes?', \"Doesn't it defeat the purpose of demonetizing 495 and 1000 rupee bill if the Government of India introduces new 495 and 2000 rupee bills?\")\n",
            "0.6 ('Why did the Indian government demonetize the current 500 and 1000 rupee notes and replace them with new notes?', \"Doesn't it defeat the purpose of demonetizing 500 and 1182 rupee bill if the Government of India introduces new 500 and 2000 rupee bills?\")\n",
            "\n",
            "----\n",
            "1.0 ('How do you say 1<x<2?', 'What is the best way to say 1?')\n",
            "1.0 ('How do you say 2<x<2?', 'What is the best way to say 1?')\n",
            "1.0 ('How do you say 2<x<2?', 'What is the best way to say 1?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "Keep entitites, fill in with gibberish\n",
            "Test cases:      500\n",
            "Fails (rate):    393 (78.6%)\n",
            "\n",
            "Example fails:\n",
            "0.0 (\"Where can I find an owner's manual for a 2008 Toyota Vitz KSP 90?\", 'What are the differences, if any, between the Toyota GT-86, FT-86, Scion FR-S, and the Subaru BR-Z?')\n",
            "1.0 ('What are the differences, if any, between the Toyota GT-86, FT-86, Scion FR-S, and the Subaru BR-Z?', 'What distinguishes Toyota from Scion FR-S or Subaru BR-Z?')\n",
            "1.0 ('What are the differences, if any, between the Toyota GT-86, FT-86, Scion FR-S, and the Subaru BR-Z?', 'What distinguishes Toyota from Scion FR-S and Subaru BR-Z?')\n",
            "\n",
            "----\n",
            "0.0 ('What are the scopes to sell artificial flowers and plants in India?', 'I am doing my B.Tech at IIT Roorkee in biotechnology. What will be the scope of biotechnology in the next few years in India and abroad?')\n",
            "0.9 ('I am doing my B.Tech at IIT Roorkee in biotechnology. What will be the scope of biotechnology in the next few years in India and abroad?', 'What define the next few years for India?')\n",
            "0.9 ('I am doing my B.Tech at IIT Roorkee in biotechnology. What will be the scope of biotechnology in the next few years in India and abroad?', 'What are the next few years of India?')\n",
            "\n",
            "----\n",
            "0.1 ('How should you take care of a Border Collie/Golden Retriever puppy?', 'What is the temperament of Mini Westie puppies?')\n",
            "0.8 ('What is the temperament of Mini Westie puppies?', 'What is Mini Westie?')\n",
            "0.8 ('What is the temperament of Mini Westie puppies?', 'What Is Mini Westie?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Temporal\n",
            "\n",
            "Is person X != Did person use to be X\n",
            "Test cases:      999\n",
            "Fails (rate):    999 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Is Brandon Nguyen an educator?', 'Did Brandon Nguyen use to be an educator?')\n",
            "----\n",
            "1.0 ('Is Kimberly Allen an attorney?', 'Did Kimberly Allen use to be an attorney?')\n",
            "----\n",
            "1.0 ('Is Ryan Hall an agent?', 'Did Ryan Hall use to be an agent?')\n",
            "----\n",
            "\n",
            "\n",
            "Is person X != Is person becoming X\n",
            "Test cases:      1000\n",
            "Fails (rate):    1000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Is Adam Collins an auditor?', 'Is Adam Collins becoming an auditor?')\n",
            "----\n",
            "1.0 ('Is Emily Baker a candidate?', 'Is Emily Baker becoming a candidate?')\n",
            "----\n",
            "1.0 ('Is John Adams an activist?', 'Is John Adams becoming an activist?')\n",
            "----\n",
            "\n",
            "\n",
            "What was person's life before becoming X != What was person's life after becoming X\n",
            "Test cases:      1000\n",
            "Fails (rate):    1000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 (\"What was Sara Roberts's life before becoming an entrepreneur?\", \"What was Sara Roberts's life after becoming an entrepreneur?\")\n",
            "----\n",
            "1.0 (\"What was Christina Walker's life before becoming an executive?\", \"What was Christina Walker's life after becoming an executive?\")\n",
            "----\n",
            "1.0 (\"What was Scott Green's life before becoming an artist?\", \"What was Scott Green's life after becoming an artist?\")\n",
            "----\n",
            "\n",
            "\n",
            "Do you have to X your dog before Y it != Do you have to X your dog after Y it.\n",
            "Test cases:      1000\n",
            "Fails (rate):    1000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Do you have to weigh your dog before cutting it?', 'Do you have to weigh your dog after cutting it?')\n",
            "----\n",
            "1.0 ('Do you have to examine your cat before eating it?', 'Do you have to examine your cat after eating it?')\n",
            "----\n",
            "1.0 ('Do you have to remove your hamster before feeding it?', 'Do you have to remove your hamster after feeding it?')\n",
            "----\n",
            "\n",
            "\n",
            "Is it {ok, dangerous, ...} to {smoke, rest, ...} after != before\n",
            "Test cases:      1000\n",
            "Fails (rate):    1000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Is it reasonable to eat before 2am?', 'Is it reasonable to eat after 2am?')\n",
            "----\n",
            "1.0 ('Is it dangerous to party before 5pm?', 'Is it dangerous to party after 5pm?')\n",
            "----\n",
            "1.0 ('Is it normal to read before 4am?', 'Is it normal to read after 4am?')\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Negation\n",
            "\n",
            "How can I become a X person != How can I become a person who is not X\n",
            "Test cases:      1000\n",
            "Fails (rate):    1000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('How can I become an equal person?', 'How can I become a person who is not equal?')\n",
            "----\n",
            "1.0 ('How can I become a judged person?', 'How can I become a person who is not judged?')\n",
            "----\n",
            "1.0 ('How can I become a human person?', 'How can I become a person who is not human?')\n",
            "----\n",
            "\n",
            "\n",
            "Is it {ok, dangerous, ...} to {smoke, rest, ...} in country != Is it {ok, dangerous, ...} not to {smoke, rest, ...} in country\n",
            "Test cases:      1000\n",
            "Fails (rate):    1000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Is it proper to preach in Chad?', 'Is it proper not to preach in Chad?')\n",
            "----\n",
            "1.0 ('Is it proper to smoke in Burkina Faso?', 'Is it proper not to smoke in Burkina Faso?')\n",
            "----\n",
            "1.0 ('Is it proper to protest in Poland?', 'Is it proper not to protest in Poland?')\n",
            "----\n",
            "\n",
            "\n",
            "What are things a {noun} should worry about != should not worry about.\n",
            "Test cases:      1000\n",
            "Fails (rate):    1000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('What are things an author should worry about?', 'What are things an author should not worry about?')\n",
            "----\n",
            "1.0 ('What are things an administrator should worry about?', 'What are things an administrator should not worry about?')\n",
            "----\n",
            "1.0 ('What are things an intern should worry about?', 'What are things an intern should not worry about?')\n",
            "----\n",
            "\n",
            "\n",
            "How can I become a X person == How can I become a person who is not antonym(X)\n",
            "Test cases:      2000\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Coref\n",
            "\n",
            "Simple coref: he and she\n",
            "Test cases:      2000\n",
            "Fails (rate):    2000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('If Daniel and Grace were alone, do you think he would reject her?', 'If Daniel and Grace were alone, do you think she would reject him?')\n",
            "----\n",
            "1.0 ('If Andrew and Danielle were alone, do you think he would reject her?', 'If Andrew and Danielle were alone, do you think she would reject him?')\n",
            "----\n",
            "1.0 ('If Christina and Joshua were alone, do you think he would reject her?', 'If Christina and Joshua were alone, do you think she would reject him?')\n",
            "----\n",
            "\n",
            "\n",
            "Simple coref: his and her\n",
            "Test cases:      2000\n",
            "Fails (rate):    2000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('If Luke and Patricia were married, would her family be happy?', \"If Luke and Patricia were married, would Luke's family be happy?\")\n",
            "----\n",
            "1.0 ('If Travis and Avery were married, would her family be happy?', \"If Travis and Avery were married, would Travis's family be happy?\")\n",
            "----\n",
            "1.0 ('If Robert and Hailey were married, would her family be happy?', \"If Robert and Hailey were married, would Robert's family be happy?\")\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "SRL\n",
            "\n",
            "Who do X think - Who is the ... according to X\n",
            "Test cases:      1000\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "Order does not matter for comparison\n",
            "Test cases:      990\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "Order does not matter for symmetric relations\n",
            "Test cases:      990\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "Order does matter for asymmetric relations\n",
            "Test cases:      988\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "traditional SRL: active / passive swap\n",
            "Test cases:      1000\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "traditional SRL: wrong active / passive swap\n",
            "Test cases:      1000\n",
            "Fails (rate):    1000 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Did Michael take the painting?', 'Was Michael taken by the painting?')\n",
            "----\n",
            "1.0 ('Did Andrew steal the game?', 'Was Andrew stolen by the game?')\n",
            "----\n",
            "1.0 ('Did James use the book?', 'Was James used by the book?')\n",
            "----\n",
            "\n",
            "\n",
            "traditional SRL: active / passive swap with people\n",
            "Test cases:      990\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "traditional SRL: wrong active / passive swap with people\n",
            "Test cases:      989\n",
            "Fails (rate):    989 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Does Erin prefer Nicholas?', 'Is Erin preferred by Nicholas?')\n",
            "----\n",
            "1.0 ('Does Christopher attack Patrick?', 'Is Christopher attacked by Patrick?')\n",
            "----\n",
            "1.0 ('Does Emma remember Anthony?', 'Is Emma remembered by Anthony?')\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n",
            "Logic\n",
            "\n",
            "A or B is not the same as C and D\n",
            "Test cases:      828\n",
            "Fails (rate):    680 (82.1%)\n",
            "\n",
            "Example fails:\n",
            "0.9 ('Is Andrea Murphy an interpreter or an administrator?', 'Is Andrea Murphy simultaneously an editor and an author?')\n",
            "----\n",
            "1.0 ('Is Jose Roberts an advisor or an administrator?', 'Is Jose Roberts simultaneously an auditor and a photographer?')\n",
            "----\n",
            "1.0 ('Is Andrew Smith an adviser or an accountant?', 'Is Andrew Smith simultaneously an economist and an auditor?')\n",
            "----\n",
            "\n",
            "\n",
            "A or B is not the same as A and B\n",
            "Test cases:      971\n",
            "Fails (rate):    971 (100.0%)\n",
            "\n",
            "Example fails:\n",
            "1.0 ('Is Emma Roberts an organizer or an investigator?', 'Is Emma Roberts simultaneously an organizer and an investigator?')\n",
            "----\n",
            "1.0 ('Is John Miller an agent or an actor?', 'Is John Miller simultaneously an agent and an actor?')\n",
            "----\n",
            "1.0 ('Is Anthony Johnson an intern or an author?', 'Is Anthony Johnson simultaneously an intern and an author?')\n",
            "----\n",
            "\n",
            "\n",
            "A and / or B is the same as B and / or A\n",
            "Test cases:      970\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "a {nationality} {profession} = a {profession} and {nationality}\n",
            "Test cases:      1000\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "Reflexivity: (q, q) should be duplicate\n",
            "Test cases:      1000\n",
            "Fails (rate):    0 (0.0%)\n",
            "\n",
            "\n",
            "Symmetry: f(a, b) = f(b, a)\n",
            "Test cases:      500\n",
            "Fails (rate):    36 (7.2%)\n",
            "\n",
            "Example fails:\n",
            "0.0 ('Nike+: What rewards does earning NikeFuel actually give you?', 'What does it mean gs Nike?')\n",
            "0.6 ('What does it mean gs Nike?', 'Nike+: What rewards does earning NikeFuel actually give you?')\n",
            "\n",
            "----\n",
            "0.5 ('Do actors get a hard-on while filming kissing scenes or hot scenes with actresses? Do they ever get carried away while filming these scenes?', 'How are sex scenes in movies shot? As an actor/actress, how is the experience?')\n",
            "0.3 ('How are sex scenes in movies shot? As an actor/actress, how is the experience?', 'Do actors get a hard-on while filming kissing scenes or hot scenes with actresses? Do they ever get carried away while filming these scenes?')\n",
            "\n",
            "----\n",
            "0.8 ('Can I add an app to my Vizio smart TV?', 'How can I watch downloaded movies on my Vizio TV?')\n",
            "0.2 ('How can I watch downloaded movies on my Vizio TV?', 'Can I add an app to my Vizio smart TV?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "Testing implications\n",
            "Test cases:      8328\n",
            "After filtering: 7144 (85.8%)\n",
            "Fails (rate):    1348 (18.9%)\n",
            "\n",
            "Example fails:\n",
            "0.1 ('Will banning 500 and 1000 notes can stop the black money?', 'How is black money curbed with the ban of 1000 rupee notes and introducing new 500 and 2000 rupee notes?')\n",
            "1.0 ('Will banning 500 and 1000 notes can stop the black money?', 'Will banning Rs.500 and Rs.1000 notes help to solve black money and corruption?')\n",
            "0.0 ('How is black money curbed with the ban of 1000 rupee notes and introducing new 500 and 2000 rupee notes?', 'Will banning Rs.500 and Rs.1000 notes help to solve black money and corruption?')\n",
            "\n",
            "----\n",
            "1.0 ('What are some new year resolutions for 2017?', 'Do you have any New Years resolutions for 2017?')\n",
            "0.9 ('What are some new year resolutions for 2017?', 'What are your resolutions for 2017? And why?')\n",
            "0.4 ('Do you have any New Years resolutions for 2017?', 'What are your resolutions for 2017? And why?')\n",
            "\n",
            "----\n",
            "0.8 ('What are shampoos that make your hair grow faster?', 'How can I make my hair grow?')\n",
            "1.0 ('What are shampoos that make your hair grow faster?', 'Is there a shampoo that will help my hair to grow faster?')\n",
            "0.3 ('How can I make my hair grow?', 'Is there a shampoo that will help my hair to grow faster?')\n",
            "\n",
            "----\n",
            "\n",
            "\n",
            "\n",
            "\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "OrEwjWgPSE98",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "b35a00b0-a9ff-42ab-af9e-e86d47461231"
      },
      "source": [
        "# MFT tests\n",
        "for test_name, test in suite.tests.items():\n",
        "  if isinstance(test, checklist.test_types.MFT):\n",
        "    print(f\"{test_name}\")"
      ],
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Modifier: adj\n",
            "different adjectives\n",
            "Different animals\n",
            "Irrelevant modifiers - animals\n",
            "Irrelevant modifiers - people\n",
            "Irrelevant preamble with different examples.\n",
            "Preamble is relevant (different injuries)\n",
            "How can I become more {synonym}?\n",
            "How can I become more X != How can I become less X\n",
            "How can I become more X = How can I become less antonym(X)\n",
            "same adjectives, different people\n",
            "same adjectives, different people v2\n",
            "same adjectives, different people v3\n",
            "Is person X != Did person use to be X\n",
            "Is person X != Is person becoming X\n",
            "What was person's life before becoming X != What was person's life after becoming X\n",
            "Do you have to X your dog before Y it != Do you have to X your dog after Y it.\n",
            "Is it {ok, dangerous, ...} to {smoke, rest, ...} after != before\n",
            "How can I become a X person != How can I become a person who is not X\n",
            "Is it {ok, dangerous, ...} to {smoke, rest, ...} in country != Is it {ok, dangerous, ...} not to {smoke, rest, ...} in country\n",
            "What are things a {noun} should worry about != should not worry about.\n",
            "How can I become a X person == How can I become a person who is not antonym(X)\n",
            "Simple coref: he and she\n",
            "Simple coref: his and her\n",
            "Who do X think - Who is the ... according to X\n",
            "Order does not matter for comparison\n",
            "Order does not matter for symmetric relations\n",
            "Order does matter for asymmetric relations\n",
            "traditional SRL: active / passive swap\n",
            "traditional SRL: wrong active / passive swap\n",
            "traditional SRL: active / passive swap with people\n",
            "traditional SRL: wrong active / passive swap with people\n",
            "A or B is not the same as C and D\n",
            "A or B is not the same as A and B\n",
            "A and / or B is the same as B and / or A\n",
            "a {nationality} {profession} = a {profession} and {nationality}\n",
            "Reflexivity: (q, q) should be duplicate\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "id": "4xyuOOlzIxgt"
      },
      "source": [
        ""
      ],
      "execution_count": null,
      "outputs": []
    }
  ]
}