{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Vbro-FVxyV8S"
      },
      "outputs": [],
      "source": [
        "import json\n",
        "\n",
        "input_path = \"\" # The .jsonlist file containing triplets with distractors\n",
        "\n",
        "with open(input_path, \"r\") as f:\n",
        "    data = f.readlines()\n",
        "\n",
        "for i in range(0, len(data), 3):\n",
        "    triplet0 = json.loads(data[i])\n",
        "    triplet1 = json.loads(data[i+1])\n",
        "    triplet2 = json.loads(data[i+2])\n",
        "\n",
        "    assert triplet0[\"fileID\"] == triplet1[\"fileID\"]\n",
        "    assert triplet0[\"fileID\"] == triplet2[\"fileID\"]\n",
        "\n",
        "    assert triplet0[\"instanceID\"] == triplet1[\"instanceID\"]\n",
        "    assert triplet0[\"instanceID\"] == triplet2[\"instanceID\"]\n",
        "\n",
        "    assert triplet0[\"structure\"] == triplet1[\"structure\"]\n",
        "    assert triplet0[\"structure\"] == triplet2[\"structure\"]\n",
        "\n",
        "    for prop in [\"prop1\", \"prop2\", \"prop3\"]:\n",
        "        assert triplet0[prop] == triplet1[prop]\n",
        "        assert triplet0[prop] == triplet2[prop]\n",
        "\n",
        "    assert triplet0[\"q_type\"] != triplet1[\"q_type\"]\n",
        "    assert triplet0[\"q_type\"] != triplet2[\"q_type\"]\n",
        "    assert triplet1[\"q_type\"] != triplet2[\"q_type\"]"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import random\n",
        "\n",
        "target_qtypes = {\"2.1\", \"2.2\", \"3.2\", \"3.3\"}\n",
        "\n",
        "output_path = \"\" # Final-formatting dataset\n",
        "\n",
        "def determine_context_and_choices(entry):\n",
        "    q_type = entry[\"q_type\"]\n",
        "    if q_type.endswith(\"1\"):\n",
        "        contexts = [entry[\"prop2\"], entry[\"prop3\"]]\n",
        "        answer = entry[\"prop1\"]\n",
        "    elif q_type.endswith(\"2\"):\n",
        "        contexts = [entry[\"prop1\"], entry[\"prop3\"]]\n",
        "        answer = entry[\"prop2\"]\n",
        "    else:  # x.3\n",
        "        contexts = [entry[\"prop1\"], entry[\"prop2\"]]\n",
        "        answer = entry[\"prop3\"]\n",
        "\n",
        "    if q_type in target_qtypes and random.random() < 0.5:\n",
        "        choices = [\n",
        "            {\"text\": answer, \"type\": \"_\"},\n",
        "            {\"text\": entry[\"d1\"], \"type\": \"i\"},\n",
        "            {\"text\": entry[\"d3\"], \"type\": \"ii\"},\n",
        "            {\"text\": entry[\"d2\"], \"type\": \"iii\"},\n",
        "        ]\n",
        "\n",
        "    else:\n",
        "        choices = [\n",
        "            {\"text\": answer, \"type\": \"_\"},\n",
        "            {\"text\": entry[\"d1\"], \"type\": \"i\"},\n",
        "            {\"text\": entry[\"d2\"], \"type\": \"ii\"},\n",
        "            {\"text\": entry[\"d3\"], \"type\": \"iii\"},\n",
        "        ]\n",
        "\n",
        "    return contexts, choices\n",
        "\n",
        "with open(input_path, \"r\", encoding=\"utf-8\") as infile, open(output_path, \"w\", encoding=\"utf-8\") as outfile:\n",
        "    for line in infile:\n",
        "        entry = json.loads(line)\n",
        "        contexts, choices = determine_context_and_choices(entry)\n",
        "\n",
        "        mcq_entry = {\n",
        "            \"fileID\": entry[\"fileID\"],\n",
        "            \"instanceID\": entry[\"instanceID\"],\n",
        "            \"structure\": entry[\"structure\"],\n",
        "            \"q_type\": entry[\"q_type\"],\n",
        "            \"contexts\": contexts,\n",
        "            \"choices\": choices\n",
        "        }\n",
        "\n",
        "        outfile.write(json.dumps(mcq_entry) + \"\\n\")"
      ],
      "metadata": {
        "id": "xsZZ1GIw1FrC"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}