{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1e7c733-a201-42ef-b057-2d331555b1c6",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "from pathlib import Path\n",
    "from collections import defaultdict\n",
    "import random"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8eeae3af-3ec0-4228-8897-1950ca745d8c",
   "metadata": {},
   "outputs": [],
   "source": [
    "with Path(\"/data/vist/sis/val.story-in-sequence.json\").open() as f:\n",
    "    data = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14e74eb2-981e-4555-823e-fbbd4674de54",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8754401-9032-4b08-8401-22238c40de7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "stories = defaultdict(lambda: defaultdict(list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "09cd41b4-871c-48a4-b70f-1de024197e81",
   "metadata": {},
   "outputs": [],
   "source": [
    "stories = defaultdict(lambda: defaultdict(list))\n",
    "for x in data[\"annotations\"]:\n",
    "    x = x[0]\n",
    "    stories[x[\"album_id\"]][x[\"worker_id\"]].append(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2dccaef9-2e6c-415b-8085-20620cdb2a8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "data[\"images\"][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1e03adc1-14a3-4bfe-8122-d952c5cbc594",
   "metadata": {},
   "outputs": [],
   "source": [
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "61a7aa2a-0dde-4db3-be70-200c8dbe22f4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def random_story(stories):\n",
    "    key = random.choice(list(stories.keys()))\n",
    "    story = stories[key]\n",
    "    print(\"Story:\")\n",
    "    for version, labels in story.items():\n",
    "        print(f\"\\nstory from worker #{version}:\")\n",
    "        for i, label in enumerate(labels):\n",
    "            print(f\"{i+1} {label['original_text']}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7b22833f-94b4-4512-9d72-02341eb89e49",
   "metadata": {},
   "outputs": [],
   "source": [
    "random_story(stories)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "43af4b2c-4fe3-4085-a159-44bbebb344f6",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
