{
  "cells": [
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "view-in-github"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/open-mmlab/mmaction2/blob/main/demo/mmaction2_tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "VcjSRFELVbNk"
      },
      "source": [
        "# MMAction2 Tutorial\n",
        "\n",
        "Welcome to MMAction2! This is the official colab tutorial for using MMAction2. In this tutorial, you will learn\n",
        "- Perform inference with a MMAction2 recognizer.\n",
        "- Train a new recognizer with a new dataset.\n",
        "\n",
        "\n",
        "Let's start!"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "7LqHGkGEVqpm"
      },
      "source": [
        "## Install MMAction2"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Bf8PpPXtVvmg",
        "outputId": "f15c9899-626c-451b-ac87-8f22993e10cb"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "nvcc: NVIDIA (R) Cuda compiler driver\n",
            "Copyright (c) 2005-2020 NVIDIA Corporation\n",
            "Built on Mon_Oct_12_20:09:46_PDT_2020\n",
            "Cuda compilation tools, release 11.1, V11.1.105\n",
            "Build cuda_11.1.TC455_06.29190527_0\n",
            "gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0\n",
            "Copyright (C) 2017 Free Software Foundation, Inc.\n",
            "This is free software; see the source for copying conditions.  There is NO\n",
            "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n",
            "\n"
          ]
        }
      ],
      "source": [
        "# Check nvcc version\n",
        "!nvcc -V\n",
        "# Check GCC version\n",
        "!gcc --version"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "5PAJ4ArzV5Ry",
        "outputId": "cab86e22-f2dc-48fb-c493-c8dba01d17f8"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Looking in links: https://download.pytorch.org/whl/torch_stable.html\n",
            "Collecting torch==1.9.0+cu111\n",
            "  Downloading https://download.pytorch.org/whl/cu111/torch-1.9.0%2Bcu111-cp37-cp37m-linux_x86_64.whl (2041.3 MB)\n",
            "\u001b[K     |█████████████                   | 834.1 MB 1.2 MB/s eta 0:16:17tcmalloc: large alloc 1147494400 bytes == 0x398f4000 @  0x7fda6376b615 0x58e046 0x4f2e5e 0x4d19df 0x51b31c 0x5b41c5 0x58f49e 0x51b221 0x5b41c5 0x58f49e 0x51837f 0x4cfabb 0x517aa0 0x4cfabb 0x517aa0 0x4cfabb 0x517aa0 0x4ba70a 0x538136 0x590055 0x51b180 0x5b41c5 0x58f49e 0x51837f 0x5b41c5 0x58f49e 0x51740e 0x58f2a7 0x517947 0x5b41c5 0x58f49e\n",
            "\u001b[K     |████████████████▌               | 1055.7 MB 1.3 MB/s eta 0:12:53tcmalloc: large alloc 1434370048 bytes == 0x7df4a000 @  0x7fda6376b615 0x58e046 0x4f2e5e 0x4d19df 0x51b31c 0x5b41c5 0x58f49e 0x51b221 0x5b41c5 0x58f49e 0x51837f 0x4cfabb 0x517aa0 0x4cfabb 0x517aa0 0x4cfabb 0x517aa0 0x4ba70a 0x538136 0x590055 0x51b180 0x5b41c5 0x58f49e 0x51837f 0x5b41c5 0x58f49e 0x51740e 0x58f2a7 0x517947 0x5b41c5 0x58f49e\n",
            "\u001b[K     |█████████████████████           | 1336.2 MB 1.3 MB/s eta 0:09:03tcmalloc: large alloc 1792966656 bytes == 0x2d7c000 @  0x7fda6376b615 0x58e046 0x4f2e5e 0x4d19df 0x51b31c 0x5b41c5 0x58f49e 0x51b221 0x5b41c5 0x58f49e 0x51837f 0x4cfabb 0x517aa0 0x4cfabb 0x517aa0 0x4cfabb 0x517aa0 0x4ba70a 0x538136 0x590055 0x51b180 0x5b41c5 0x58f49e 0x51837f 0x5b41c5 0x58f49e 0x51740e 0x58f2a7 0x517947 0x5b41c5 0x58f49e\n",
            "\u001b[K     |██████████████████████████▌     | 1691.1 MB 1.2 MB/s eta 0:04:54tcmalloc: large alloc 2241208320 bytes == 0x6db64000 @  0x7fda6376b615 0x58e046 0x4f2e5e 0x4d19df 0x51b31c 0x5b41c5 0x58f49e 0x51b221 0x5b41c5 0x58f49e 0x51837f 0x4cfabb 0x517aa0 0x4cfabb 0x517aa0 0x4cfabb 0x517aa0 0x4ba70a 0x538136 0x590055 0x51b180 0x5b41c5 0x58f49e 0x51837f 0x5b41c5 0x58f49e 0x51740e 0x58f2a7 0x517947 0x5b41c5 0x58f49e\n",
            "\u001b[K     |████████████████████████████████| 2041.3 MB 1.2 MB/s eta 0:00:01tcmalloc: large alloc 2041348096 bytes == 0xf34c6000 @  0x7fda6376a1e7 0x4b2150 0x4b21dc 0x58e046 0x4f2e5e 0x4d19df 0x51b31c 0x5b41c5 0x58f49e 0x517947 0x5b41c5 0x58f49e 0x517947 0x5b41c5 0x58f49e 0x517947 0x5b41c5 0x58f49e 0x517947 0x5b41c5 0x58f49e 0x517947 0x58f2a7 0x517947 0x5b41c5 0x58f49e 0x51837f 0x5b41c5 0x58f49e 0x51837f 0x5b41c5\n",
            "tcmalloc: large alloc 2551685120 bytes == 0x1e1444000 @  0x7fda6376b615 0x58e046 0x4f2e5e 0x4d19df 0x51b31c 0x5b41c5 0x58f49e 0x517947 0x5b41c5 0x58f49e 0x517947 0x5b41c5 0x58f49e 0x517947 0x5b41c5 0x58f49e 0x517947 0x5b41c5 0x58f49e 0x517947 0x58f2a7 0x517947 0x5b41c5 0x58f49e 0x51837f 0x5b41c5 0x58f49e 0x51837f 0x5b41c5 0x4ba899 0x4d29f9\n",
            "\u001b[K     |████████████████████████████████| 2041.3 MB 7.3 kB/s \n",
            "\u001b[?25hCollecting torchvision==0.10.0+cu111\n",
            "  Downloading https://download.pytorch.org/whl/cu111/torchvision-0.10.0%2Bcu111-cp37-cp37m-linux_x86_64.whl (23.2 MB)\n",
            "\u001b[K     |████████████████████████████████| 23.2 MB 1.1 MB/s \n",
            "\u001b[?25hRequirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch==1.9.0+cu111) (4.1.1)\n",
            "Requirement already satisfied: pillow>=5.3.0 in /usr/local/lib/python3.7/dist-packages (from torchvision==0.10.0+cu111) (7.1.2)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from torchvision==0.10.0+cu111) (1.21.6)\n",
            "Installing collected packages: torch, torchvision\n",
            "  Attempting uninstall: torch\n",
            "    Found existing installation: torch 1.12.1+cu113\n",
            "    Uninstalling torch-1.12.1+cu113:\n",
            "      Successfully uninstalled torch-1.12.1+cu113\n",
            "  Attempting uninstall: torchvision\n",
            "    Found existing installation: torchvision 0.13.1+cu113\n",
            "    Uninstalling torchvision-0.13.1+cu113:\n",
            "      Successfully uninstalled torchvision-0.13.1+cu113\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "torchtext 0.13.1 requires torch==1.12.1, but you have torch 1.9.0+cu111 which is incompatible.\n",
            "torchaudio 0.12.1+cu113 requires torch==1.12.1, but you have torch 1.9.0+cu111 which is incompatible.\u001b[0m\n",
            "Successfully installed torch-1.9.0+cu111 torchvision-0.10.0+cu111\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Looking in links: https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html\n",
            "Collecting mmcv>=2.0.0rc1\n",
            "  Downloading https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/mmcv-2.0.0rc1-cp37-cp37m-manylinux1_x86_64.whl (46.7 MB)\n",
            "\u001b[K     |████████████████████████████████| 46.7 MB 329 kB/s \n",
            "\u001b[?25hRequirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.7/dist-packages (from mmcv>=2.0.0rc1) (4.6.0.66)\n",
            "Collecting addict\n",
            "  Downloading addict-2.4.0-py3-none-any.whl (3.8 kB)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from mmcv>=2.0.0rc1) (21.3)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from mmcv>=2.0.0rc1) (1.21.6)\n",
            "Collecting yapf\n",
            "  Downloading yapf-0.32.0-py2.py3-none-any.whl (190 kB)\n",
            "\u001b[K     |████████████████████████████████| 190 kB 39.5 MB/s \n",
            "\u001b[?25hRequirement already satisfied: Pillow in /usr/local/lib/python3.7/dist-packages (from mmcv>=2.0.0rc1) (7.1.2)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from mmcv>=2.0.0rc1) (6.0)\n",
            "Collecting mmengine\n",
            "  Downloading mmengine-0.1.0-py3-none-any.whl (280 kB)\n",
            "\u001b[K     |████████████████████████████████| 280 kB 69.0 MB/s \n",
            "\u001b[?25hRequirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from mmengine->mmcv>=2.0.0rc1) (3.2.2)\n",
            "Requirement already satisfied: termcolor in /usr/local/lib/python3.7/dist-packages (from mmengine->mmcv>=2.0.0rc1) (1.1.0)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmengine->mmcv>=2.0.0rc1) (1.4.4)\n",
            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmengine->mmcv>=2.0.0rc1) (3.0.9)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmengine->mmcv>=2.0.0rc1) (0.11.0)\n",
            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmengine->mmcv>=2.0.0rc1) (2.8.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->mmengine->mmcv>=2.0.0rc1) (4.1.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->mmengine->mmcv>=2.0.0rc1) (1.15.0)\n",
            "Installing collected packages: yapf, addict, mmengine, mmcv\n",
            "Successfully installed addict-2.4.0 mmcv-2.0.0rc1 mmengine-0.1.0 yapf-0.32.0\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Requirement already satisfied: mmengine in /usr/local/lib/python3.7/dist-packages (0.1.0)\n",
            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.7/dist-packages (from mmengine) (6.0)\n",
            "Requirement already satisfied: yapf in /usr/local/lib/python3.7/dist-packages (from mmengine) (0.32.0)\n",
            "Requirement already satisfied: addict in /usr/local/lib/python3.7/dist-packages (from mmengine) (2.4.0)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from mmengine) (1.21.6)\n",
            "Requirement already satisfied: termcolor in /usr/local/lib/python3.7/dist-packages (from mmengine) (1.1.0)\n",
            "Requirement already satisfied: opencv-python>=3 in /usr/local/lib/python3.7/dist-packages (from mmengine) (4.6.0.66)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from mmengine) (3.2.2)\n",
            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmengine) (3.0.9)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmengine) (1.4.4)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmengine) (0.11.0)\n",
            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmengine) (2.8.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->mmengine) (4.1.1)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->mmengine) (1.15.0)\n",
            "Cloning into 'mmaction2'...\n",
            "remote: Enumerating objects: 18817, done.\u001b[K\n",
            "remote: Counting objects: 100% (143/143), done.\u001b[K\n",
            "remote: Compressing objects: 100% (118/118), done.\u001b[K\n",
            "remote: Total 18817 (delta 48), reused 71 (delta 20), pack-reused 18674\u001b[K\n",
            "Receiving objects: 100% (18817/18817), 67.06 MiB | 17.18 MiB/s, done.\n",
            "Resolving deltas: 100% (13336/13336), done.\n",
            "/content/mmaction2\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Obtaining file:///content/mmaction2\n",
            "Collecting decord>=0.4.1\n",
            "  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n",
            "\u001b[K     |████████████████████████████████| 13.6 MB 3.7 MB/s \n",
            "\u001b[?25hCollecting einops\n",
            "  Downloading einops-0.4.1-py3-none-any.whl (28 kB)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from mmaction2==1.0.0rc0) (3.2.2)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from mmaction2==1.0.0rc0) (1.21.6)\n",
            "Requirement already satisfied: opencv-contrib-python in /usr/local/lib/python3.7/dist-packages (from mmaction2==1.0.0rc0) (4.6.0.66)\n",
            "Requirement already satisfied: Pillow in /usr/local/lib/python3.7/dist-packages (from mmaction2==1.0.0rc0) (7.1.2)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from mmaction2==1.0.0rc0) (1.7.3)\n",
            "Requirement already satisfied: torch>=1.3 in /usr/local/lib/python3.7/dist-packages (from mmaction2==1.0.0rc0) (1.9.0+cu111)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch>=1.3->mmaction2==1.0.0rc0) (4.1.1)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmaction2==1.0.0rc0) (0.11.0)\n",
            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmaction2==1.0.0rc0) (2.8.2)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmaction2==1.0.0rc0) (1.4.4)\n",
            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->mmaction2==1.0.0rc0) (3.0.9)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->mmaction2==1.0.0rc0) (1.15.0)\n",
            "Installing collected packages: einops, decord, mmaction2\n",
            "  Running setup.py develop for mmaction2\n",
            "Successfully installed decord-0.6.0 einops-0.4.1 mmaction2-1.0.0rc0\n",
            "Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/\n",
            "Collecting av>=9.0\n",
            "  Downloading av-9.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (28.2 MB)\n",
            "\u001b[K     |████████████████████████████████| 28.2 MB 82.7 MB/s \n",
            "\u001b[?25hRequirement already satisfied: future in /usr/local/lib/python3.7/dist-packages (from -r requirements/optional.txt (line 2)) (0.16.0)\n",
            "Requirement already satisfied: imgaug in /usr/local/lib/python3.7/dist-packages (from -r requirements/optional.txt (line 3)) (0.4.0)\n",
            "Requirement already satisfied: librosa in /usr/local/lib/python3.7/dist-packages (from -r requirements/optional.txt (line 4)) (0.8.1)\n",
            "Requirement already satisfied: lmdb in /usr/local/lib/python3.7/dist-packages (from -r requirements/optional.txt (line 5)) (0.99)\n",
            "Requirement already satisfied: moviepy in /usr/local/lib/python3.7/dist-packages (from -r requirements/optional.txt (line 6)) (0.2.3.5)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from -r requirements/optional.txt (line 7)) (21.3)\n",
            "Collecting pims\n",
            "  Downloading PIMS-0.6.1.tar.gz (86 kB)\n",
            "\u001b[K     |████████████████████████████████| 86 kB 6.0 MB/s \n",
            "\u001b[?25hCollecting PyTurboJPEG\n",
            "  Downloading PyTurboJPEG-1.6.7.tar.gz (11 kB)\n",
            "Requirement already satisfied: soundfile in /usr/local/lib/python3.7/dist-packages (from -r requirements/optional.txt (line 10)) (0.10.3.post1)\n",
            "Requirement already satisfied: tensorboard in /usr/local/lib/python3.7/dist-packages (from -r requirements/optional.txt (line 11)) (2.8.0)\n",
            "Collecting timm\n",
            "  Downloading timm-0.6.7-py3-none-any.whl (509 kB)\n",
            "\u001b[K     |████████████████████████████████| 509 kB 65.2 MB/s \n",
            "\u001b[?25hCollecting wandb\n",
            "  Downloading wandb-0.13.3-py2.py3-none-any.whl (1.8 MB)\n",
            "\u001b[K     |████████████████████████████████| 1.8 MB 55.8 MB/s \n",
            "\u001b[?25hRequirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.7/dist-packages (from imgaug->-r requirements/optional.txt (line 3)) (1.21.6)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.7/dist-packages (from imgaug->-r requirements/optional.txt (line 3)) (1.7.3)\n",
            "Requirement already satisfied: imageio in /usr/local/lib/python3.7/dist-packages (from imgaug->-r requirements/optional.txt (line 3)) (2.9.0)\n",
            "Requirement already satisfied: Shapely in /usr/local/lib/python3.7/dist-packages (from imgaug->-r requirements/optional.txt (line 3)) (1.8.4)\n",
            "Requirement already satisfied: opencv-python in /usr/local/lib/python3.7/dist-packages (from imgaug->-r requirements/optional.txt (line 3)) (4.6.0.66)\n",
            "Requirement already satisfied: Pillow in /usr/local/lib/python3.7/dist-packages (from imgaug->-r requirements/optional.txt (line 3)) (7.1.2)\n",
            "Requirement already satisfied: scikit-image>=0.14.2 in /usr/local/lib/python3.7/dist-packages (from imgaug->-r requirements/optional.txt (line 3)) (0.18.3)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from imgaug->-r requirements/optional.txt (line 3)) (1.15.0)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from imgaug->-r requirements/optional.txt (line 3)) (3.2.2)\n",
            "Requirement already satisfied: tifffile>=2019.7.26 in /usr/local/lib/python3.7/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 3)) (2021.11.2)\n",
            "Requirement already satisfied: PyWavelets>=1.1.1 in /usr/local/lib/python3.7/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 3)) (1.3.0)\n",
            "Requirement already satisfied: networkx>=2.0 in /usr/local/lib/python3.7/dist-packages (from scikit-image>=0.14.2->imgaug->-r requirements/optional.txt (line 3)) (2.6.3)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 3)) (0.11.0)\n",
            "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 3)) (1.4.4)\n",
            "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 3)) (3.0.9)\n",
            "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->imgaug->-r requirements/optional.txt (line 3)) (2.8.2)\n",
            "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from kiwisolver>=1.0.1->matplotlib->imgaug->-r requirements/optional.txt (line 3)) (4.1.1)\n",
            "Requirement already satisfied: pooch>=1.0 in /usr/local/lib/python3.7/dist-packages (from librosa->-r requirements/optional.txt (line 4)) (1.6.0)\n",
            "Requirement already satisfied: joblib>=0.14 in /usr/local/lib/python3.7/dist-packages (from librosa->-r requirements/optional.txt (line 4)) (1.1.0)\n",
            "Requirement already satisfied: audioread>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from librosa->-r requirements/optional.txt (line 4)) (3.0.0)\n",
            "Requirement already satisfied: scikit-learn!=0.19.0,>=0.14.0 in /usr/local/lib/python3.7/dist-packages (from librosa->-r requirements/optional.txt (line 4)) (1.0.2)\n",
            "Requirement already satisfied: numba>=0.43.0 in /usr/local/lib/python3.7/dist-packages (from librosa->-r requirements/optional.txt (line 4)) (0.56.2)\n",
            "Requirement already satisfied: decorator>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from librosa->-r requirements/optional.txt (line 4)) (4.4.2)\n",
            "Requirement already satisfied: resampy>=0.2.2 in /usr/local/lib/python3.7/dist-packages (from librosa->-r requirements/optional.txt (line 4)) (0.4.0)\n",
            "Requirement already satisfied: cffi>=1.0 in /usr/local/lib/python3.7/dist-packages (from soundfile->-r requirements/optional.txt (line 10)) (1.15.1)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.7/dist-packages (from cffi>=1.0->soundfile->-r requirements/optional.txt (line 10)) (2.21)\n",
            "Requirement already satisfied: llvmlite<0.40,>=0.39.0dev0 in /usr/local/lib/python3.7/dist-packages (from numba>=0.43.0->librosa->-r requirements/optional.txt (line 4)) (0.39.1)\n",
            "Requirement already satisfied: setuptools<60 in /usr/local/lib/python3.7/dist-packages (from numba>=0.43.0->librosa->-r requirements/optional.txt (line 4)) (57.4.0)\n",
            "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from numba>=0.43.0->librosa->-r requirements/optional.txt (line 4)) (4.12.0)\n",
            "Requirement already satisfied: appdirs>=1.3.0 in /usr/local/lib/python3.7/dist-packages (from pooch>=1.0->librosa->-r requirements/optional.txt (line 4)) (1.4.4)\n",
            "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from pooch>=1.0->librosa->-r requirements/optional.txt (line 4)) (2.23.0)\n",
            "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->-r requirements/optional.txt (line 4)) (3.0.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->-r requirements/optional.txt (line 4)) (2022.6.15)\n",
            "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->-r requirements/optional.txt (line 4)) (1.24.3)\n",
            "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->pooch>=1.0->librosa->-r requirements/optional.txt (line 4)) (2.10)\n",
            "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn!=0.19.0,>=0.14.0->librosa->-r requirements/optional.txt (line 4)) (3.1.0)\n",
            "Requirement already satisfied: tqdm<5.0,>=4.11.2 in /usr/local/lib/python3.7/dist-packages (from moviepy->-r requirements/optional.txt (line 6)) (4.64.1)\n",
            "Collecting slicerator>=0.9.8\n",
            "  Downloading slicerator-1.1.0-py3-none-any.whl (10 kB)\n",
            "Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (1.35.0)\n",
            "Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (1.48.1)\n",
            "Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (0.6.1)\n",
            "Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (0.37.1)\n",
            "Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (1.2.0)\n",
            "Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (3.17.3)\n",
            "Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (0.4.6)\n",
            "Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (1.0.1)\n",
            "Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (1.8.1)\n",
            "Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard->-r requirements/optional.txt (line 11)) (3.4.1)\n",
            "Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 11)) (4.9)\n",
            "Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 11)) (0.2.8)\n",
            "Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 11)) (4.2.4)\n",
            "Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard->-r requirements/optional.txt (line 11)) (1.3.1)\n",
            "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->numba>=0.43.0->librosa->-r requirements/optional.txt (line 4)) (3.8.1)\n",
            "Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->-r requirements/optional.txt (line 11)) (0.4.8)\n",
            "Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard->-r requirements/optional.txt (line 11)) (3.2.0)\n",
            "Requirement already satisfied: torch>=1.4 in /usr/local/lib/python3.7/dist-packages (from timm->-r requirements/optional.txt (line 12)) (1.9.0+cu111)\n",
            "Requirement already satisfied: torchvision in /usr/local/lib/python3.7/dist-packages (from timm->-r requirements/optional.txt (line 12)) (0.10.0+cu111)\n",
            "Requirement already satisfied: PyYAML in /usr/local/lib/python3.7/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (6.0)\n",
            "Collecting sentry-sdk>=1.0.0\n",
            "  Downloading sentry_sdk-1.9.9-py2.py3-none-any.whl (162 kB)\n",
            "\u001b[K     |████████████████████████████████| 162 kB 61.4 MB/s \n",
            "\u001b[?25hCollecting shortuuid>=0.5.0\n",
            "  Downloading shortuuid-1.0.9-py3-none-any.whl (9.4 kB)\n",
            "Requirement already satisfied: psutil>=5.0.0 in /usr/local/lib/python3.7/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (5.4.8)\n",
            "Collecting pathtools\n",
            "  Downloading pathtools-0.1.2.tar.gz (11 kB)\n",
            "Collecting GitPython>=1.0.0\n",
            "  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)\n",
            "\u001b[K     |████████████████████████████████| 181 kB 50.2 MB/s \n",
            "\u001b[?25hRequirement already satisfied: Click!=8.0.0,>=7.0 in /usr/local/lib/python3.7/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (7.1.2)\n",
            "Collecting docker-pycreds>=0.4.0\n",
            "  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)\n",
            "Collecting setproctitle\n",
            "  Downloading setproctitle-1.3.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)\n",
            "Requirement already satisfied: promise<3,>=2.0 in /usr/local/lib/python3.7/dist-packages (from wandb->-r requirements/optional.txt (line 13)) (2.3)\n",
            "Collecting gitdb<5,>=4.0.1\n",
            "  Downloading gitdb-4.0.9-py3-none-any.whl (63 kB)\n",
            "\u001b[K     |████████████████████████████████| 63 kB 1.9 MB/s \n",
            "\u001b[?25hCollecting smmap<6,>=3.0.1\n",
            "  Downloading smmap-5.0.0-py3-none-any.whl (24 kB)\n",
            "Collecting sentry-sdk>=1.0.0\n",
            "  Downloading sentry_sdk-1.9.8-py2.py3-none-any.whl (158 kB)\n",
            "\u001b[K     |████████████████████████████████| 158 kB 71.6 MB/s \n",
            "\u001b[?25h  Downloading sentry_sdk-1.9.7-py2.py3-none-any.whl (157 kB)\n",
            "\u001b[K     |████████████████████████████████| 157 kB 73.7 MB/s \n",
            "\u001b[?25h  Downloading sentry_sdk-1.9.6-py2.py3-none-any.whl (157 kB)\n",
            "\u001b[K     |████████████████████████████████| 157 kB 61.7 MB/s \n",
            "\u001b[?25h  Downloading sentry_sdk-1.9.5-py2.py3-none-any.whl (157 kB)\n",
            "\u001b[K     |████████████████████████████████| 157 kB 72.2 MB/s \n",
            "\u001b[?25h  Downloading sentry_sdk-1.9.4-py2.py3-none-any.whl (157 kB)\n",
            "\u001b[K     |████████████████████████████████| 157 kB 58.3 MB/s \n",
            "\u001b[?25h  Downloading sentry_sdk-1.9.3-py2.py3-none-any.whl (157 kB)\n",
            "\u001b[K     |████████████████████████████████| 157 kB 77.5 MB/s \n",
            "\u001b[?25h  Downloading sentry_sdk-1.9.2-py2.py3-none-any.whl (157 kB)\n",
            "\u001b[K     |████████████████████████████████| 157 kB 74.8 MB/s \n",
            "\u001b[?25h  Downloading sentry_sdk-1.9.1-py2.py3-none-any.whl (157 kB)\n",
            "\u001b[K     |████████████████████████████████| 157 kB 78.3 MB/s \n",
            "\u001b[?25h  Downloading sentry_sdk-1.9.0-py2.py3-none-any.whl (156 kB)\n",
            "\u001b[K     |████████████████████████████████| 156 kB 78.3 MB/s \n",
            "\u001b[?25hBuilding wheels for collected packages: pims, PyTurboJPEG, pathtools\n",
            "  Building wheel for pims (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for pims: filename=PIMS-0.6.1-py3-none-any.whl size=82636 sha256=6f1abb69aebd4a185010ae4f642ea85a1024e57a16018d049078388a9570902d\n",
            "  Stored in directory: /root/.cache/pip/wheels/8e/d5/a9/f702433436655b7a2bc7ff93efd742587dd5abd44f7e406917\n",
            "  Building wheel for PyTurboJPEG (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for PyTurboJPEG: filename=PyTurboJPEG-1.6.7-py3-none-any.whl size=12168 sha256=afaaf82724c60ef43eed834fd1ca21ba7d8fce3dad033125360c68c247a425f4\n",
            "  Stored in directory: /root/.cache/pip/wheels/71/d6/9a/cce1808661c049dc4b55b3f41001198dd0efd979cb43833f89\n",
            "  Building wheel for pathtools (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for pathtools: filename=pathtools-0.1.2-py3-none-any.whl size=8806 sha256=d258ef45da44f5e0ca9f3d7c1145ab282b85834eafad399ff551f7c6d2ce1936\n",
            "  Stored in directory: /root/.cache/pip/wheels/3e/31/09/fa59cef12cdcfecc627b3d24273699f390e71828921b2cbba2\n",
            "Successfully built pims PyTurboJPEG pathtools\n",
            "Installing collected packages: smmap, gitdb, slicerator, shortuuid, setproctitle, sentry-sdk, pathtools, GitPython, docker-pycreds, wandb, timm, PyTurboJPEG, pims, av\n",
            "Successfully installed GitPython-3.1.27 PyTurboJPEG-1.6.7 av-9.2.0 docker-pycreds-0.4.0 gitdb-4.0.9 pathtools-0.1.2 pims-0.6.1 sentry-sdk-1.9.0 setproctitle-1.3.2 shortuuid-1.0.9 slicerator-1.1.0 smmap-5.0.0 timm-0.6.7 wandb-0.13.3\n"
          ]
        }
      ],
      "source": [
        "# install dependencies: (use cu111 because colab has CUDA 11.1)\n",
        "!pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html\n",
        "\n",
        "# Install mmcv\n",
        "!pip install 'mmcv>=2.0.0' -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.9.0/index.html\n",
        "\n",
        "# Install mmengine\n",
        "!pip install mmengine\n",
        "\n",
        "# Install mmaction2\n",
        "!rm -rf mmaction2\n",
        "!git clone https://github.com/open-mmlab/mmaction2.git -b main\n",
        "%cd mmaction2\n",
        "\n",
        "!pip install -e .\n",
        "\n",
        "# Install some optional requirements\n",
        "!pip install -r requirements/optional.txt"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "No_zZAFpWC-a",
        "outputId": "74ed33f2-4b5b-45eb-991f-38061a5f26c2"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "1.9.0+cu111 True\n",
            "1.0.0rc0\n",
            "11.1\n",
            "GCC 7.3\n",
            "OrderedDict([('sys.platform', 'linux'), ('Python', '3.7.14 (default, Sep  8 2022, 00:06:44) [GCC 7.5.0]'), ('CUDA available', True), ('numpy_random_seed', 2147483648), ('GPU 0', 'Tesla T4'), ('CUDA_HOME', '/usr/local/cuda'), ('NVCC', 'Cuda compilation tools, release 11.1, V11.1.105'), ('GCC', 'x86_64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0'), ('PyTorch', '1.9.0+cu111'), ('PyTorch compiling details', 'PyTorch built with:\\n  - GCC 7.3\\n  - C++ Version: 201402\\n  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications\\n  - Intel(R) MKL-DNN v2.1.2 (Git Hash 98be7e8afa711dc9b66c8ff3504129cb82013cdb)\\n  - OpenMP 201511 (a.k.a. OpenMP 4.5)\\n  - NNPACK is enabled\\n  - CPU capability usage: AVX2\\n  - CUDA Runtime 11.1\\n  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86\\n  - CuDNN 8.0.5\\n  - Magma 2.5.2\\n  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \\n'), ('TorchVision', '0.10.0+cu111'), ('OpenCV', '4.6.0'), ('MMEngine', '0.1.0')])\n"
          ]
        }
      ],
      "source": [
        "# Check Pytorch installation\n",
        "import torch, torchvision\n",
        "print(torch.__version__, torch.cuda.is_available())\n",
        "\n",
        "# Check MMAction2 installation\n",
        "import mmaction\n",
        "print(mmaction.__version__)\n",
        "\n",
        "# Check MMCV installation\n",
        "from mmcv.ops import get_compiling_cuda_version, get_compiler_version\n",
        "print(get_compiling_cuda_version())\n",
        "print(get_compiler_version())\n",
        "\n",
        "# Check MMEngine installation\n",
        "from mmengine.utils.dl_utils import collect_env\n",
        "print(collect_env())"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "pXf7oV5DWdab"
      },
      "source": [
        "## Perform inference with a MMAction2 recognizer\n",
        "MMAction2 already provides high level APIs to do inference and training."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "64CW6d_AaT-Q",
        "outputId": "65f64306-5947-4507-d5e7-39cda4d4bbcb"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "--2022-09-27 02:52:15--  https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n",
            "Resolving download.openmmlab.com (download.openmmlab.com)... 161.117.242.67\n",
            "Connecting to download.openmmlab.com (download.openmmlab.com)|161.117.242.67|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 97579339 (93M) [application/octet-stream]\n",
            "Saving to: ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’\n",
            "\n",
            "checkpoints/tsn_r50 100%[===================>]  93.06M  26.1MB/s    in 3.7s    \n",
            "\n",
            "2022-09-27 02:52:19 (25.0 MB/s) - ‘checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth’ saved [97579339/97579339]\n",
            "\n"
          ]
        }
      ],
      "source": [
        "!mkdir checkpoints\n",
        "!wget -c https://download.openmmlab.com/mmaction/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth \\\n",
        "      -O checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "HNZB7NoSabzj",
        "outputId": "d77f88c3-516f-4345-d1a2-a17fa28603c4"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "local loads checkpoint from path: checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n"
          ]
        }
      ],
      "source": [
        "from mmaction.apis import inference_recognizer, init_recognizer\n",
        "from mmengine import Config\n",
        "\n",
        "\n",
        "# Choose to use a config and initialize the recognizer\n",
        "config = 'configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py'\n",
        "config = Config.fromfile(config)\n",
        "# Setup a checkpoint file to load\n",
        "checkpoint = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
        "# Initialize the recognizer\n",
        "model = init_recognizer(config, checkpoint, device='cuda:0')"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "id": "rEMsBnpHapAn"
      },
      "outputs": [],
      "source": [
        "# Use the recognizer to do inference\n",
        "from operator import itemgetter\n",
        "video = 'demo/demo.mp4'\n",
        "label = 'tools/data/kinetics/label_map_k400.txt'\n",
        "results = inference_recognizer(model, video)\n",
        "\n",
        "pred_scores = results.pred_scores.item.tolist()\n",
        "score_tuples = tuple(zip(range(len(pred_scores)), pred_scores))\n",
        "score_sorted = sorted(score_tuples, key=itemgetter(1), reverse=True)\n",
        "top5_label = score_sorted[:5]\n",
        "\n",
        "labels = open(label).readlines()\n",
        "labels = [x.strip() for x in labels]\n",
        "results = [(labels[k[0]], k[1]) for k in top5_label]\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "NIyJXqfWathq",
        "outputId": "16296299-ac08-4319-bdce-b0293c05ad7a"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "The top-5 labels with corresponding scores are:\n",
            "arm wrestling:  29.61644172668457\n",
            "rock scissors paper:  10.754841804504395\n",
            "shaking hands:  9.908400535583496\n",
            "clapping:  9.189912796020508\n",
            "massaging feet:  8.305307388305664\n"
          ]
        }
      ],
      "source": [
        "print('The top-5 labels with corresponding scores are:')\n",
        "for result in results:\n",
        "    print(f'{result[0]}: ', result[1])"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "QuZG8kZ2fJ5d"
      },
      "source": [
        "## Train a recognizer on customized dataset\n",
        "\n",
        "To train a new recognizer, there are usually three things to do:\n",
        "1. Support a new dataset\n",
        "2. Modify the config\n",
        "3. Train a new recognizer"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "neEFyxChfgiJ"
      },
      "source": [
        "### Support a new dataset\n",
        "\n",
        "In this tutorial, we gives an example to convert the data into the format of existing datasets. Other methods and more advanced usages can be found in the [doc](/docs/tutorials/new_dataset.md)\n",
        "\n",
        "Firstly, let's download a tiny dataset obtained from [Kinetics-400](https://deepmind.com/research/open-source/open-source-datasets/kinetics/). We select 30 videos with their labels as train dataset and 10 videos with their labels as test dataset."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gjsUj9JzgUlJ",
        "outputId": "db715958-3b04-4a71-9b90-5074e0550c3d"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "rm: cannot remove 'kinetics400_tiny.zip*': No such file or directory\n",
            "--2022-09-27 02:57:21--  https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n",
            "Resolving download.openmmlab.com (download.openmmlab.com)... 161.117.242.67\n",
            "Connecting to download.openmmlab.com (download.openmmlab.com)|161.117.242.67|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 18308682 (17M) [application/zip]\n",
            "Saving to: ‘kinetics400_tiny.zip’\n",
            "\n",
            "kinetics400_tiny.zi 100%[===================>]  17.46M  15.7MB/s    in 1.1s    \n",
            "\n",
            "2022-09-27 02:57:23 (15.7 MB/s) - ‘kinetics400_tiny.zip’ saved [18308682/18308682]\n",
            "\n"
          ]
        }
      ],
      "source": [
        "# download, decompress the data\n",
        "!rm kinetics400_tiny.zip*\n",
        "!rm -rf kinetics400_tiny\n",
        "!wget https://download.openmmlab.com/mmaction/kinetics400_tiny.zip\n",
        "!unzip kinetics400_tiny.zip > /dev/null"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 11,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "AbZ-o7V6hNw4",
        "outputId": "e577d4de-c121-43be-c11c-efd4082a01ff"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Reading package lists...\n",
            "Building dependency tree...\n",
            "Reading state information...\n",
            "The following package was automatically installed and is no longer required:\n",
            "  libnvidia-common-460\n",
            "Use 'apt autoremove' to remove it.\n",
            "The following NEW packages will be installed:\n",
            "  tree\n",
            "0 upgraded, 1 newly installed, 0 to remove and 20 not upgraded.\n",
            "Need to get 40.7 kB of archives.\n",
            "After this operation, 105 kB of additional disk space will be used.\n",
            "Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 tree amd64 1.7.0-5 [40.7 kB]\n",
            "Fetched 40.7 kB in 1s (47.1 kB/s)\n",
            "Selecting previously unselected package tree.\n",
            "(Reading database ... 157604 files and directories currently installed.)\n",
            "Preparing to unpack .../tree_1.7.0-5_amd64.deb ...\n",
            "Unpacking tree (1.7.0-5) ...\n",
            "Setting up tree (1.7.0-5) ...\n",
            "Processing triggers for man-db (2.8.3-2ubuntu0.1) ...\n",
            "kinetics400_tiny\n",
            "├── kinetics_tiny_train_video.txt\n",
            "├── kinetics_tiny_val_video.txt\n",
            "├── train\n",
            "│   ├── 27_CSXByd3s.mp4\n",
            "│   ├── 34XczvTaRiI.mp4\n",
            "│   ├── A-wiliK50Zw.mp4\n",
            "│   ├── D32_1gwq35E.mp4\n",
            "│   ├── D92m0HsHjcQ.mp4\n",
            "│   ├── DbX8mPslRXg.mp4\n",
            "│   ├── FMlSTTpN3VY.mp4\n",
            "│   ├── h10B9SVE-nk.mp4\n",
            "│   ├── h2YqqUhnR34.mp4\n",
            "│   ├── iRuyZSKhHRg.mp4\n",
            "│   ├── IyfILH9lBRo.mp4\n",
            "│   ├── kFC3KY2bOP8.mp4\n",
            "│   ├── LvcFDgCAXQs.mp4\n",
            "│   ├── O46YA8tI530.mp4\n",
            "│   ├── oMrZaozOvdQ.mp4\n",
            "│   ├── oXy-e_P_cAI.mp4\n",
            "│   ├── P5M-hAts7MQ.mp4\n",
            "│   ├── phDqGd0NKoo.mp4\n",
            "│   ├── PnOe3GZRVX8.mp4\n",
            "│   ├── R8HXQkdgKWA.mp4\n",
            "│   ├── RqnKtCEoEcA.mp4\n",
            "│   ├── soEcZZsBmDs.mp4\n",
            "│   ├── TkkZPZHbAKA.mp4\n",
            "│   ├── T_TMNGzVrDk.mp4\n",
            "│   ├── WaS0qwP46Us.mp4\n",
            "│   ├── Wh_YPQdH1Zg.mp4\n",
            "│   ├── WWP5HZJsg-o.mp4\n",
            "│   ├── xGY2dP0YUjA.mp4\n",
            "│   ├── yLC9CtWU5ws.mp4\n",
            "│   └── ZQV4U2KQ370.mp4\n",
            "└── val\n",
            "    ├── 0pVGiAU6XEA.mp4\n",
            "    ├── AQrbRSnRt8M.mp4\n",
            "    ├── b6Q_b7vgc7Q.mp4\n",
            "    ├── ddvJ6-faICE.mp4\n",
            "    ├── IcLztCtvhb8.mp4\n",
            "    ├── ik4BW3-SCts.mp4\n",
            "    ├── jqRrH30V0k4.mp4\n",
            "    ├── SU_x2LQqSLs.mp4\n",
            "    ├── u4Rm6srmIS8.mp4\n",
            "    └── y5Iu7XkTqV0.mp4\n",
            "\n",
            "2 directories, 42 files\n"
          ]
        }
      ],
      "source": [
        "# Check the directory structure of the tiny data\n",
        "\n",
        "# Install tree first\n",
        "!apt-get -q install tree\n",
        "!tree kinetics400_tiny"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fTdi6dI0hY3g",
        "outputId": "c2ea77cc-8e68-4f2a-b3fa-8469bf4d1a23"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "D32_1gwq35E.mp4 0\n",
            "iRuyZSKhHRg.mp4 1\n",
            "oXy-e_P_cAI.mp4 0\n",
            "34XczvTaRiI.mp4 1\n",
            "h2YqqUhnR34.mp4 0\n",
            "O46YA8tI530.mp4 0\n",
            "kFC3KY2bOP8.mp4 1\n",
            "WWP5HZJsg-o.mp4 1\n",
            "phDqGd0NKoo.mp4 1\n",
            "yLC9CtWU5ws.mp4 0\n",
            "27_CSXByd3s.mp4 1\n",
            "IyfILH9lBRo.mp4 1\n",
            "T_TMNGzVrDk.mp4 1\n",
            "TkkZPZHbAKA.mp4 0\n",
            "PnOe3GZRVX8.mp4 1\n",
            "soEcZZsBmDs.mp4 1\n",
            "FMlSTTpN3VY.mp4 1\n",
            "WaS0qwP46Us.mp4 0\n",
            "A-wiliK50Zw.mp4 1\n",
            "oMrZaozOvdQ.mp4 1\n",
            "ZQV4U2KQ370.mp4 0\n",
            "DbX8mPslRXg.mp4 1\n",
            "h10B9SVE-nk.mp4 1\n",
            "P5M-hAts7MQ.mp4 0\n",
            "R8HXQkdgKWA.mp4 0\n",
            "D92m0HsHjcQ.mp4 0\n",
            "RqnKtCEoEcA.mp4 0\n",
            "LvcFDgCAXQs.mp4 0\n",
            "xGY2dP0YUjA.mp4 0\n",
            "Wh_YPQdH1Zg.mp4 0\n"
          ]
        }
      ],
      "source": [
        "# After downloading the data, we need to check the annotation format\n",
        "!cat kinetics400_tiny/kinetics_tiny_train_video.txt"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "0bq0mxmEi29H"
      },
      "source": [
        "According to the format defined in [`VideoDataset`](./datasets/video_dataset.py), each line indicates a sample video with the filepath and label, which are split with a whitespace."
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "Ht_DGJA9jQar"
      },
      "source": [
        "### Modify the config\n",
        "\n",
        "In the next step, we need to modify the config for the training.\n",
        "To accelerate the process, we finetune a recognizer using a pre-trained recognizer."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 12,
      "metadata": {
        "id": "LjCcmCKOjktc"
      },
      "outputs": [],
      "source": [
        "cfg = Config.fromfile('./configs/recognition/tsn/tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb.py')"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "tc8YhFFGjp3e"
      },
      "source": [
        "Given a config that trains a TSN model on kinetics400-full dataset, we need to modify some values to use it for training TSN on Kinetics400-tiny dataset.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 13,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tlhu9byjjt-K",
        "outputId": "7fbd0262-bf8a-4879-840e-adb07b6a2c93"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Config:\n",
            "model = dict(\n",
            "    type='Recognizer2D',\n",
            "    backbone=dict(\n",
            "        type='ResNet',\n",
            "        pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n",
            "        depth=50,\n",
            "        norm_eval=False),\n",
            "    cls_head=dict(\n",
            "        type='TSNHead',\n",
            "        num_classes=2,\n",
            "        in_channels=2048,\n",
            "        spatial_type='avg',\n",
            "        consensus=dict(type='AvgConsensus', dim=1),\n",
            "        dropout_ratio=0.4,\n",
            "        init_std=0.01,\n",
            "        average_clips=None),\n",
            "    data_preprocessor=dict(\n",
            "        type='ActionDataPreprocessor',\n",
            "        mean=[123.675, 116.28, 103.53],\n",
            "        std=[58.395, 57.12, 57.375],\n",
            "        format_shape='NCHW'),\n",
            "    train_cfg=None,\n",
            "    test_cfg=None)\n",
            "train_cfg = dict(\n",
            "    type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n",
            "val_cfg = dict(type='ValLoop')\n",
            "test_cfg = dict(type='TestLoop')\n",
            "param_scheduler = [\n",
            "    dict(\n",
            "        type='MultiStepLR',\n",
            "        begin=0,\n",
            "        end=100,\n",
            "        by_epoch=True,\n",
            "        milestones=[40, 80],\n",
            "        gamma=0.1)\n",
            "]\n",
            "optim_wrapper = dict(\n",
            "    optimizer=dict(\n",
            "        type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n",
            "    clip_grad=dict(max_norm=40, norm_type=2))\n",
            "default_scope = 'mmaction'\n",
            "default_hooks = dict(\n",
            "    runtime_info=dict(type='RuntimeInfoHook'),\n",
            "    timer=dict(type='IterTimerHook'),\n",
            "    logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n",
            "    param_scheduler=dict(type='ParamSchedulerHook'),\n",
            "    checkpoint=dict(\n",
            "        type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n",
            "    sampler_seed=dict(type='DistSamplerSeedHook'))\n",
            "env_cfg = dict(\n",
            "    cudnn_benchmark=False,\n",
            "    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n",
            "    dist_cfg=dict(backend='nccl'))\n",
            "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n",
            "vis_backends = [dict(type='LocalVisBackend')]\n",
            "visualizer = dict(\n",
            "    type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n",
            "log_level = 'INFO'\n",
            "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
            "resume = False\n",
            "dataset_type = 'VideoDataset'\n",
            "data_root = 'kinetics400_tiny/train/'\n",
            "data_root_val = 'kinetics400_tiny/val/'\n",
            "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
            "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
            "train_pipeline = [\n",
            "    dict(type='DecordInit'),\n",
            "    dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n",
            "    dict(type='DecordDecode'),\n",
            "    dict(type='Resize', scale=(-1, 256)),\n",
            "    dict(\n",
            "        type='MultiScaleCrop',\n",
            "        input_size=224,\n",
            "        scales=(1, 0.875, 0.75, 0.66),\n",
            "        random_crop=False,\n",
            "        max_wh_scale_gap=1),\n",
            "    dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
            "    dict(type='Flip', flip_ratio=0.5),\n",
            "    dict(type='FormatShape', input_format='NCHW'),\n",
            "    dict(type='PackActionInputs')\n",
            "]\n",
            "val_pipeline = [\n",
            "    dict(type='DecordInit'),\n",
            "    dict(\n",
            "        type='SampleFrames',\n",
            "        clip_len=1,\n",
            "        frame_interval=1,\n",
            "        num_clips=3,\n",
            "        test_mode=True),\n",
            "    dict(type='DecordDecode'),\n",
            "    dict(type='Resize', scale=(-1, 256)),\n",
            "    dict(type='CenterCrop', crop_size=224),\n",
            "    dict(type='FormatShape', input_format='NCHW'),\n",
            "    dict(type='PackActionInputs')\n",
            "]\n",
            "test_pipeline = [\n",
            "    dict(type='DecordInit'),\n",
            "    dict(\n",
            "        type='SampleFrames',\n",
            "        clip_len=1,\n",
            "        frame_interval=1,\n",
            "        num_clips=25,\n",
            "        test_mode=True),\n",
            "    dict(type='DecordDecode'),\n",
            "    dict(type='Resize', scale=(-1, 256)),\n",
            "    dict(type='TenCrop', crop_size=224),\n",
            "    dict(type='FormatShape', input_format='NCHW'),\n",
            "    dict(type='PackActionInputs')\n",
            "]\n",
            "train_dataloader = dict(\n",
            "    batch_size=2,\n",
            "    num_workers=2,\n",
            "    persistent_workers=True,\n",
            "    sampler=dict(type='DefaultSampler', shuffle=True),\n",
            "    dataset=dict(\n",
            "        type='VideoDataset',\n",
            "        ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n",
            "        data_prefix=dict(video='kinetics400_tiny/train/'),\n",
            "        pipeline=[\n",
            "            dict(type='DecordInit'),\n",
            "            dict(\n",
            "                type='SampleFrames', clip_len=1, frame_interval=1,\n",
            "                num_clips=3),\n",
            "            dict(type='DecordDecode'),\n",
            "            dict(type='Resize', scale=(-1, 256)),\n",
            "            dict(\n",
            "                type='MultiScaleCrop',\n",
            "                input_size=224,\n",
            "                scales=(1, 0.875, 0.75, 0.66),\n",
            "                random_crop=False,\n",
            "                max_wh_scale_gap=1),\n",
            "            dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
            "            dict(type='Flip', flip_ratio=0.5),\n",
            "            dict(type='FormatShape', input_format='NCHW'),\n",
            "            dict(type='PackActionInputs')\n",
            "        ]))\n",
            "val_dataloader = dict(\n",
            "    batch_size=2,\n",
            "    num_workers=2,\n",
            "    persistent_workers=True,\n",
            "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
            "    dataset=dict(\n",
            "        type='VideoDataset',\n",
            "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
            "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
            "        pipeline=[\n",
            "            dict(type='DecordInit'),\n",
            "            dict(\n",
            "                type='SampleFrames',\n",
            "                clip_len=1,\n",
            "                frame_interval=1,\n",
            "                num_clips=3,\n",
            "                test_mode=True),\n",
            "            dict(type='DecordDecode'),\n",
            "            dict(type='Resize', scale=(-1, 256)),\n",
            "            dict(type='CenterCrop', crop_size=224),\n",
            "            dict(type='FormatShape', input_format='NCHW'),\n",
            "            dict(type='PackActionInputs')\n",
            "        ],\n",
            "        test_mode=True))\n",
            "test_dataloader = dict(\n",
            "    batch_size=1,\n",
            "    num_workers=2,\n",
            "    persistent_workers=True,\n",
            "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
            "    dataset=dict(\n",
            "        type='VideoDataset',\n",
            "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
            "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
            "        pipeline=[\n",
            "            dict(type='DecordInit'),\n",
            "            dict(\n",
            "                type='SampleFrames',\n",
            "                clip_len=1,\n",
            "                frame_interval=1,\n",
            "                num_clips=25,\n",
            "                test_mode=True),\n",
            "            dict(type='DecordDecode'),\n",
            "            dict(type='Resize', scale=(-1, 256)),\n",
            "            dict(type='TenCrop', crop_size=224),\n",
            "            dict(type='FormatShape', input_format='NCHW'),\n",
            "            dict(type='PackActionInputs')\n",
            "        ],\n",
            "        test_mode=True))\n",
            "val_evaluator = dict(type='AccMetric')\n",
            "test_evaluator = dict(type='AccMetric')\n",
            "work_dir = './tutorial_exps'\n",
            "\n"
          ]
        }
      ],
      "source": [
        "from mmengine.runner import set_random_seed\n",
        "\n",
        "# Modify dataset type and path\n",
        "cfg.data_root = 'kinetics400_tiny/train/'\n",
        "cfg.data_root_val = 'kinetics400_tiny/val/'\n",
        "cfg.ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
        "cfg.ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
        "\n",
        "\n",
        "cfg.test_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
        "cfg.test_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/val/'\n",
        "\n",
        "cfg.train_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
        "cfg.train_dataloader.dataset.data_prefix.video = 'kinetics400_tiny/train/'\n",
        "\n",
        "cfg.val_dataloader.dataset.ann_file = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
        "cfg.val_dataloader.dataset.data_prefix.video  = 'kinetics400_tiny/val/'\n",
        "\n",
        "\n",
        "# Modify num classes of the model in cls_head\n",
        "cfg.model.cls_head.num_classes = 2\n",
        "# We can use the pre-trained TSN model\n",
        "cfg.load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
        "\n",
        "# Set up working dir to save files and logs.\n",
        "cfg.work_dir = './tutorial_exps'\n",
        "\n",
        "# The original learning rate (LR) is set for 8-GPU training.\n",
        "# We divide it by 8 since we only use one GPU.\n",
        "cfg.train_dataloader.batch_size = cfg.train_dataloader.batch_size // 16\n",
        "cfg.val_dataloader.batch_size = cfg.val_dataloader.batch_size // 16\n",
        "cfg.optim_wrapper.optimizer.lr = cfg.optim_wrapper.optimizer.lr / 8 / 16\n",
        "cfg.train_cfg.max_epochs = 10\n",
        "\n",
        "cfg.train_dataloader.num_workers = 2\n",
        "cfg.val_dataloader.num_workers = 2\n",
        "cfg.test_dataloader.num_workers = 2\n",
        "\n",
        "# We can initialize the logger for training and have a look\n",
        "# at the final config used for training\n",
        "print(f'Config:\\n{cfg.pretty_text}')\n"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "tES-qnZ3k38Z"
      },
      "source": [
        "### Train a new recognizer\n",
        "\n",
        "Finally, lets initialize the dataset and recognizer, then train a new recognizer!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 14,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1000,
          "referenced_widgets": [
            "40af3bfc9e0a486cb95f50766c76787f",
            "e9024658f55f4a4e9f40c76cf3510377",
            "1f7168d5d48942ec960a3dd2734ae4b3",
            "ea430aaddedc43d6862d13470da78261",
            "46371a881bd0487eb956fe1552d33bb2",
            "8ce5458513694d43a5f6e03ef4c94219",
            "efef671ff3bd491199f713ef0dc30188",
            "b2ee09effef844ee811df107d556c1ec",
            "f55845dcbfa445f18884408313d7a229",
            "2d14de40880f48c5809bcd1391e1b564",
            "c21c25b1f9f347aaa0428e6528e3a3e0"
          ]
        },
        "id": "dDBWkdDRk6oz",
        "outputId": "006abb37-4097-43e1-a008-d4251a89b5fb"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "09/27 02:57:47 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - \n",
            "------------------------------------------------------------\n",
            "System environment:\n",
            "    sys.platform: linux\n",
            "    Python: 3.7.14 (default, Sep  8 2022, 00:06:44) [GCC 7.5.0]\n",
            "    CUDA available: True\n",
            "    numpy_random_seed: 222144858\n",
            "    GPU 0: Tesla T4\n",
            "    CUDA_HOME: /usr/local/cuda\n",
            "    NVCC: Cuda compilation tools, release 11.1, V11.1.105\n",
            "    GCC: x86_64-linux-gnu-gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0\n",
            "    PyTorch: 1.9.0+cu111\n",
            "    PyTorch compiling details: PyTorch built with:\n",
            "  - GCC 7.3\n",
            "  - C++ Version: 201402\n",
            "  - Intel(R) Math Kernel Library Version 2020.0.0 Product Build 20191122 for Intel(R) 64 architecture applications\n",
            "  - Intel(R) MKL-DNN v2.1.2 (Git Hash 98be7e8afa711dc9b66c8ff3504129cb82013cdb)\n",
            "  - OpenMP 201511 (a.k.a. OpenMP 4.5)\n",
            "  - NNPACK is enabled\n",
            "  - CPU capability usage: AVX2\n",
            "  - CUDA Runtime 11.1\n",
            "  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86\n",
            "  - CuDNN 8.0.5\n",
            "  - Magma 2.5.2\n",
            "  - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.1, CUDNN_VERSION=8.0.5, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.9.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, \n",
            "\n",
            "    TorchVision: 0.10.0+cu111\n",
            "    OpenCV: 4.6.0\n",
            "    MMEngine: 0.1.0\n",
            "\n",
            "Runtime environment:\n",
            "    cudnn_benchmark: False\n",
            "    mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}\n",
            "    dist_cfg: {'backend': 'nccl'}\n",
            "    seed: None\n",
            "    Distributed launcher: none\n",
            "    Distributed training: False\n",
            "    GPU number: 1\n",
            "------------------------------------------------------------\n",
            "\n",
            "09/27 02:57:47 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Config:\n",
            "model = dict(\n",
            "    type='Recognizer2D',\n",
            "    backbone=dict(\n",
            "        type='ResNet',\n",
            "        pretrained='https://download.pytorch.org/models/resnet50-11ad3fa6.pth',\n",
            "        depth=50,\n",
            "        norm_eval=False),\n",
            "    cls_head=dict(\n",
            "        type='TSNHead',\n",
            "        num_classes=2,\n",
            "        in_channels=2048,\n",
            "        spatial_type='avg',\n",
            "        consensus=dict(type='AvgConsensus', dim=1),\n",
            "        dropout_ratio=0.4,\n",
            "        init_std=0.01,\n",
            "        average_clips=None),\n",
            "    data_preprocessor=dict(\n",
            "        type='ActionDataPreprocessor',\n",
            "        mean=[123.675, 116.28, 103.53],\n",
            "        std=[58.395, 57.12, 57.375],\n",
            "        format_shape='NCHW'),\n",
            "    train_cfg=None,\n",
            "    test_cfg=None)\n",
            "train_cfg = dict(\n",
            "    type='EpochBasedTrainLoop', max_epochs=10, val_begin=1, val_interval=1)\n",
            "val_cfg = dict(type='ValLoop')\n",
            "test_cfg = dict(type='TestLoop')\n",
            "param_scheduler = [\n",
            "    dict(\n",
            "        type='MultiStepLR',\n",
            "        begin=0,\n",
            "        end=100,\n",
            "        by_epoch=True,\n",
            "        milestones=[40, 80],\n",
            "        gamma=0.1)\n",
            "]\n",
            "optim_wrapper = dict(\n",
            "    optimizer=dict(\n",
            "        type='SGD', lr=7.8125e-05, momentum=0.9, weight_decay=0.0001),\n",
            "    clip_grad=dict(max_norm=40, norm_type=2))\n",
            "default_scope = 'mmaction'\n",
            "default_hooks = dict(\n",
            "    runtime_info=dict(type='RuntimeInfoHook'),\n",
            "    timer=dict(type='IterTimerHook'),\n",
            "    logger=dict(type='LoggerHook', interval=20, ignore_last=False),\n",
            "    param_scheduler=dict(type='ParamSchedulerHook'),\n",
            "    checkpoint=dict(\n",
            "        type='CheckpointHook', interval=3, save_best='auto', max_keep_ckpts=3),\n",
            "    sampler_seed=dict(type='DistSamplerSeedHook'))\n",
            "env_cfg = dict(\n",
            "    cudnn_benchmark=False,\n",
            "    mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),\n",
            "    dist_cfg=dict(backend='nccl'))\n",
            "log_processor = dict(type='LogProcessor', window_size=20, by_epoch=True)\n",
            "vis_backends = [dict(type='LocalVisBackend')]\n",
            "visualizer = dict(\n",
            "    type='ActionVisualizer', vis_backends=[dict(type='LocalVisBackend')])\n",
            "log_level = 'INFO'\n",
            "load_from = './checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'\n",
            "resume = False\n",
            "dataset_type = 'VideoDataset'\n",
            "data_root = 'kinetics400_tiny/train/'\n",
            "data_root_val = 'kinetics400_tiny/val/'\n",
            "ann_file_train = 'kinetics400_tiny/kinetics_tiny_train_video.txt'\n",
            "ann_file_val = 'kinetics400_tiny/kinetics_tiny_val_video.txt'\n",
            "train_pipeline = [\n",
            "    dict(type='DecordInit'),\n",
            "    dict(type='SampleFrames', clip_len=1, frame_interval=1, num_clips=3),\n",
            "    dict(type='DecordDecode'),\n",
            "    dict(type='Resize', scale=(-1, 256)),\n",
            "    dict(\n",
            "        type='MultiScaleCrop',\n",
            "        input_size=224,\n",
            "        scales=(1, 0.875, 0.75, 0.66),\n",
            "        random_crop=False,\n",
            "        max_wh_scale_gap=1),\n",
            "    dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
            "    dict(type='Flip', flip_ratio=0.5),\n",
            "    dict(type='FormatShape', input_format='NCHW'),\n",
            "    dict(type='PackActionInputs')\n",
            "]\n",
            "val_pipeline = [\n",
            "    dict(type='DecordInit'),\n",
            "    dict(\n",
            "        type='SampleFrames',\n",
            "        clip_len=1,\n",
            "        frame_interval=1,\n",
            "        num_clips=3,\n",
            "        test_mode=True),\n",
            "    dict(type='DecordDecode'),\n",
            "    dict(type='Resize', scale=(-1, 256)),\n",
            "    dict(type='CenterCrop', crop_size=224),\n",
            "    dict(type='FormatShape', input_format='NCHW'),\n",
            "    dict(type='PackActionInputs')\n",
            "]\n",
            "test_pipeline = [\n",
            "    dict(type='DecordInit'),\n",
            "    dict(\n",
            "        type='SampleFrames',\n",
            "        clip_len=1,\n",
            "        frame_interval=1,\n",
            "        num_clips=25,\n",
            "        test_mode=True),\n",
            "    dict(type='DecordDecode'),\n",
            "    dict(type='Resize', scale=(-1, 256)),\n",
            "    dict(type='TenCrop', crop_size=224),\n",
            "    dict(type='FormatShape', input_format='NCHW'),\n",
            "    dict(type='PackActionInputs')\n",
            "]\n",
            "train_dataloader = dict(\n",
            "    batch_size=2,\n",
            "    num_workers=2,\n",
            "    persistent_workers=True,\n",
            "    sampler=dict(type='DefaultSampler', shuffle=True),\n",
            "    dataset=dict(\n",
            "        type='VideoDataset',\n",
            "        ann_file='kinetics400_tiny/kinetics_tiny_train_video.txt',\n",
            "        data_prefix=dict(video='kinetics400_tiny/train/'),\n",
            "        pipeline=[\n",
            "            dict(type='DecordInit'),\n",
            "            dict(\n",
            "                type='SampleFrames', clip_len=1, frame_interval=1,\n",
            "                num_clips=3),\n",
            "            dict(type='DecordDecode'),\n",
            "            dict(type='Resize', scale=(-1, 256)),\n",
            "            dict(\n",
            "                type='MultiScaleCrop',\n",
            "                input_size=224,\n",
            "                scales=(1, 0.875, 0.75, 0.66),\n",
            "                random_crop=False,\n",
            "                max_wh_scale_gap=1),\n",
            "            dict(type='Resize', scale=(224, 224), keep_ratio=False),\n",
            "            dict(type='Flip', flip_ratio=0.5),\n",
            "            dict(type='FormatShape', input_format='NCHW'),\n",
            "            dict(type='PackActionInputs')\n",
            "        ]))\n",
            "val_dataloader = dict(\n",
            "    batch_size=2,\n",
            "    num_workers=2,\n",
            "    persistent_workers=True,\n",
            "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
            "    dataset=dict(\n",
            "        type='VideoDataset',\n",
            "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
            "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
            "        pipeline=[\n",
            "            dict(type='DecordInit'),\n",
            "            dict(\n",
            "                type='SampleFrames',\n",
            "                clip_len=1,\n",
            "                frame_interval=1,\n",
            "                num_clips=3,\n",
            "                test_mode=True),\n",
            "            dict(type='DecordDecode'),\n",
            "            dict(type='Resize', scale=(-1, 256)),\n",
            "            dict(type='CenterCrop', crop_size=224),\n",
            "            dict(type='FormatShape', input_format='NCHW'),\n",
            "            dict(type='PackActionInputs')\n",
            "        ],\n",
            "        test_mode=True))\n",
            "test_dataloader = dict(\n",
            "    batch_size=1,\n",
            "    num_workers=2,\n",
            "    persistent_workers=True,\n",
            "    sampler=dict(type='DefaultSampler', shuffle=False),\n",
            "    dataset=dict(\n",
            "        type='VideoDataset',\n",
            "        ann_file='kinetics400_tiny/kinetics_tiny_val_video.txt',\n",
            "        data_prefix=dict(video='kinetics400_tiny/val/'),\n",
            "        pipeline=[\n",
            "            dict(type='DecordInit'),\n",
            "            dict(\n",
            "                type='SampleFrames',\n",
            "                clip_len=1,\n",
            "                frame_interval=1,\n",
            "                num_clips=25,\n",
            "                test_mode=True),\n",
            "            dict(type='DecordDecode'),\n",
            "            dict(type='Resize', scale=(-1, 256)),\n",
            "            dict(type='TenCrop', crop_size=224),\n",
            "            dict(type='FormatShape', input_format='NCHW'),\n",
            "            dict(type='PackActionInputs')\n",
            "        ],\n",
            "        test_mode=True))\n",
            "val_evaluator = dict(type='AccMetric')\n",
            "test_evaluator = dict(type='AccMetric')\n",
            "work_dir = './tutorial_exps'\n",
            "\n",
            "Result has been saved to /content/mmaction2/tutorial_exps/modules_statistic_results.json\n",
            "09/27 02:57:48 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used.\n",
            "http loads checkpoint from path: https://download.pytorch.org/models/resnet50-11ad3fa6.pth\n"
          ]
        },
        {
          "name": "stderr",
          "output_type": "stream",
          "text": [
            "Downloading: \"https://download.pytorch.org/models/resnet50-11ad3fa6.pth\" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth\n"
          ]
        },
        {
          "data": {
            "application/vnd.jupyter.widget-view+json": {
              "model_id": "40af3bfc9e0a486cb95f50766c76787f",
              "version_major": 2,
              "version_minor": 0
            },
            "text/plain": [
              "  0%|          | 0.00/97.8M [00:00<?, ?B/s]"
            ]
          },
          "metadata": {},
          "output_type": "display_data"
        },
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "09/27 02:57:49 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - These parameters in pretrained checkpoint are not loaded: {'fc.bias', 'fc.weight'}\n",
            "local loads checkpoint from path: ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n",
            "The model and loaded state dict do not match exactly\n",
            "\n",
            "size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([2, 2048]).\n",
            "size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([2]).\n",
            "09/27 02:57:49 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Load checkpoint from ./checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth\n",
            "09/27 02:57:49 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Checkpoints will be saved to /content/mmaction2/tutorial_exps by HardDiskBackend.\n",
            "09/27 02:57:52 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:57:52 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [1][15/15]  lr: 7.8125e-05  memory: 3982  data_time: 0.0752  grad_norm: 12.4249  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.7375  loss: 0.7375  time: 0.2058\n",
            "09/27 02:57:53 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [1][5/5]  acc/top1: 0.5000  acc/top5: 1.0000  acc/mean1: 0.5000\n",
            "09/27 02:57:54 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - The best checkpoint with 0.5000 acc/top1 at 1 epoch is saved to best_acc/top1_epoch_1.pth.\n",
            "09/27 02:57:57 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:57:57 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [2][15/15]  lr: 7.8125e-05  memory: 909  data_time: 0.0766  grad_norm: 12.1984  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.7093  loss: 0.7093  time: 0.1874\n",
            "09/27 02:57:58 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [2][5/5]  acc/top1: 0.7000  acc/top5: 1.0000  acc/mean1: 0.7000\n",
            "09/27 02:57:58 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc/top1_epoch_1.pth is removed\n",
            "09/27 02:57:59 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - The best checkpoint with 0.7000 acc/top1 at 2 epoch is saved to best_acc/top1_epoch_2.pth.\n",
            "09/27 02:58:02 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:58:02 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [3][15/15]  lr: 7.8125e-05  memory: 909  data_time: 0.0781  grad_norm: 12.3429  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.7050  loss: 0.7050  time: 0.1847\n",
            "09/27 02:58:02 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Saving checkpoint at 3 epochs\n",
            "09/27 02:58:05 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [3][5/5]  acc/top1: 0.8000  acc/top5: 1.0000  acc/mean1: 0.8000\n",
            "09/27 02:58:05 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc/top1_epoch_2.pth is removed\n",
            "09/27 02:58:05 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - The best checkpoint with 0.8000 acc/top1 at 3 epoch is saved to best_acc/top1_epoch_3.pth.\n",
            "09/27 02:58:08 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:58:08 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [4][15/15]  lr: 7.8125e-05  memory: 909  data_time: 0.0933  grad_norm: 11.8890  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.6913  loss: 0.6913  time: 0.1929\n",
            "09/27 02:58:09 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [4][5/5]  acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000\n",
            "09/27 02:58:09 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - The previous best checkpoint /content/mmaction2/tutorial_exps/best_acc/top1_epoch_3.pth is removed\n",
            "09/27 02:58:10 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - The best checkpoint with 1.0000 acc/top1 at 4 epoch is saved to best_acc/top1_epoch_4.pth.\n",
            "09/27 02:58:13 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:58:13 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [5][15/15]  lr: 7.8125e-05  memory: 909  data_time: 0.0788  grad_norm: 12.5985  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.6858  loss: 0.6858  time: 0.1906\n",
            "09/27 02:58:14 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [5][5/5]  acc/top1: 0.7000  acc/top5: 1.0000  acc/mean1: 0.7000\n",
            "09/27 02:58:17 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:58:17 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [6][15/15]  lr: 7.8125e-05  memory: 909  data_time: 0.0862  grad_norm: 12.3359  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.6537  loss: 0.6537  time: 0.1946\n",
            "09/27 02:58:17 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Saving checkpoint at 6 epochs\n",
            "09/27 02:58:20 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [6][5/5]  acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000\n",
            "09/27 02:58:22 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:58:22 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [7][15/15]  lr: 7.8125e-05  memory: 909  data_time: 0.0834  grad_norm: 12.0508  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.6482  loss: 0.6482  time: 0.1870\n",
            "09/27 02:58:23 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [7][5/5]  acc/top1: 0.9000  acc/top5: 1.0000  acc/mean1: 0.9000\n",
            "09/27 02:58:26 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:58:26 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [8][15/15]  lr: 7.8125e-05  memory: 909  data_time: 0.0825  grad_norm: 11.4525  top1_acc: 1.0000  top5_acc: 1.0000  loss_cls: 0.6055  loss: 0.6055  time: 0.1809\n",
            "09/27 02:58:27 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [8][5/5]  acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000\n",
            "09/27 02:58:30 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:58:30 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [9][15/15]  lr: 7.8125e-05  memory: 909  data_time: 0.0775  grad_norm: 12.5174  top1_acc: 0.0000  top5_acc: 1.0000  loss_cls: 0.6563  loss: 0.6563  time: 0.1766\n",
            "09/27 02:58:30 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Saving checkpoint at 9 epochs\n",
            "09/27 02:58:33 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [9][5/5]  acc/top1: 0.8000  acc/top5: 1.0000  acc/mean1: 0.8000\n",
            "09/27 02:58:35 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Exp name: tsn_imagenet-pretrained-r50_8xb32-1x1x3-100e_kinetics400-rgb_20220927_025747\n",
            "09/27 02:58:35 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(train) [10][15/15]  lr: 7.8125e-05  memory: 909  data_time: 0.0722  grad_norm: 12.1286  top1_acc: 0.5000  top5_acc: 1.0000  loss_cls: 0.6339  loss: 0.6339  time: 0.1777\n",
            "09/27 02:58:35 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Saving checkpoint at 10 epochs\n",
            "09/27 02:58:38 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(val) [10][5/5]  acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000\n"
          ]
        },
        {
          "data": {
            "text/plain": [
              "Recognizer2D(\n",
              "  (data_preprocessor): ActionDataPreprocessor()\n",
              "  (backbone): ResNet(\n",
              "    (conv1): ConvModule(\n",
              "      (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
              "      (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "      (activate): ReLU(inplace=True)\n",
              "    )\n",
              "    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
              "    (layer1): Sequential(\n",
              "      (0): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "        (downsample): ConvModule(\n",
              "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "      )\n",
              "      (1): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "      (2): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "    )\n",
              "    (layer2): Sequential(\n",
              "      (0): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "        (downsample): ConvModule(\n",
              "          (conv): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "      )\n",
              "      (1): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "      (2): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "      (3): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "    )\n",
              "    (layer3): Sequential(\n",
              "      (0): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "        (downsample): ConvModule(\n",
              "          (conv): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "      )\n",
              "      (1): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "      (2): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "      (3): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "      (4): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "      (5): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "    )\n",
              "    (layer4): Sequential(\n",
              "      (0): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "        (downsample): ConvModule(\n",
              "          (conv): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)\n",
              "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "      )\n",
              "      (1): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "      (2): Bottleneck(\n",
              "        (conv1): ConvModule(\n",
              "          (conv): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv2): ConvModule(\n",
              "          (conv): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "          (activate): ReLU(inplace=True)\n",
              "        )\n",
              "        (conv3): ConvModule(\n",
              "          (conv): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
              "          (bn): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
              "        )\n",
              "        (relu): ReLU(inplace=True)\n",
              "      )\n",
              "    )\n",
              "  )\n",
              "  (cls_head): TSNHead(\n",
              "    (loss_cls): CrossEntropyLoss()\n",
              "    (consensus): AvgConsensus()\n",
              "    (avg_pool): AdaptiveAvgPool2d(output_size=(1, 1))\n",
              "    (dropout): Dropout(p=0.4, inplace=False)\n",
              "    (fc_cls): Linear(in_features=2048, out_features=2, bias=True)\n",
              "  )\n",
              ")"
            ]
          },
          "execution_count": 14,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "import os.path as osp\n",
        "import mmengine\n",
        "from mmengine.runner import Runner\n",
        "\n",
        "# Create work_dir\n",
        "mmengine.mkdir_or_exist(osp.abspath(cfg.work_dir))\n",
        "\n",
        "# build the runner from config\n",
        "runner = Runner.from_cfg(cfg)\n",
        "\n",
        "# start training\n",
        "runner.train()"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "zdSd7oTLlxIf"
      },
      "source": [
        "### Understand the log\n",
        "From the log, we can have a basic understanding the training process and know how well the recognizer is trained.\n",
        "\n",
        "Firstly, the ResNet-50 backbone pre-trained on ImageNet is loaded, this is a common practice since training from scratch is more cost. The log shows that all the weights of the ResNet-50 backbone are loaded except the `fc.bias` and `fc.weight`.\n",
        "\n",
        "Second, since the dataset we are using is small, we loaded a TSN model and finetune it for action recognition.\n",
        "The original TSN is trained on original Kinetics-400 dataset which contains 400 classes but Kinetics-400 Tiny dataset only have 2 classes. Therefore, the last FC layer of the pre-trained TSN for classification has different weight shape and is not used.\n",
        "\n",
        "Third, after training, the recognizer is evaluated by the default evaluation. The results show that the recognizer achieves 100% top1 accuracy and 100% top5 accuracy on the val dataset,\n",
        " \n",
        "Not bad!"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ryVoSfZVmogw"
      },
      "source": [
        "## Test the trained recognizer\n",
        "\n",
        "After finetuning the recognizer, let's check the prediction results!"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 22,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "eyY3hCMwyTct",
        "outputId": "ee6d92ce-e8ea-45c4-f0bd-25c98be05b00"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "09/27 03:00:53 - mmengine - \u001b[4m\u001b[37mINFO\u001b[0m - Epoch(test) [10/10]  acc/top1: 1.0000  acc/top5: 1.0000  acc/mean1: 1.0000\n"
          ]
        },
        {
          "data": {
            "text/plain": [
              "{'acc/top1': 1.0, 'acc/top5': 1.0, 'acc/mean1': 1.0}"
            ]
          },
          "execution_count": 22,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "runner.test()"
      ]
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "collapsed_sections": [],
      "provenance": [],
      "toc_visible": true
    },
    "kernelspec": {
      "display_name": "mmact_dev",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.7.16"
    },
    "vscode": {
      "interpreter": {
        "hash": "189c342a4747645665e89db23000ac4d4edb7a87c4cd0b2f881610f468fb778d"
      }
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "1f7168d5d48942ec960a3dd2734ae4b3": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "FloatProgressModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b2ee09effef844ee811df107d556c1ec",
            "max": 102540417,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f55845dcbfa445f18884408313d7a229",
            "value": 102540417
          }
        },
        "2d14de40880f48c5809bcd1391e1b564": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "40af3bfc9e0a486cb95f50766c76787f": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HBoxModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_e9024658f55f4a4e9f40c76cf3510377",
              "IPY_MODEL_1f7168d5d48942ec960a3dd2734ae4b3",
              "IPY_MODEL_ea430aaddedc43d6862d13470da78261"
            ],
            "layout": "IPY_MODEL_46371a881bd0487eb956fe1552d33bb2"
          }
        },
        "46371a881bd0487eb956fe1552d33bb2": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "8ce5458513694d43a5f6e03ef4c94219": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b2ee09effef844ee811df107d556c1ec": {
          "model_module": "@jupyter-widgets/base",
          "model_module_version": "1.2.0",
          "model_name": "LayoutModel",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c21c25b1f9f347aaa0428e6528e3a3e0": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "e9024658f55f4a4e9f40c76cf3510377": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8ce5458513694d43a5f6e03ef4c94219",
            "placeholder": "​",
            "style": "IPY_MODEL_efef671ff3bd491199f713ef0dc30188",
            "value": "100%"
          }
        },
        "ea430aaddedc43d6862d13470da78261": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "HTMLModel",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_2d14de40880f48c5809bcd1391e1b564",
            "placeholder": "​",
            "style": "IPY_MODEL_c21c25b1f9f347aaa0428e6528e3a3e0",
            "value": " 97.8M/97.8M [00:00&lt;00:00, 223MB/s]"
          }
        },
        "efef671ff3bd491199f713ef0dc30188": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "DescriptionStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f55845dcbfa445f18884408313d7a229": {
          "model_module": "@jupyter-widgets/controls",
          "model_module_version": "1.5.0",
          "model_name": "ProgressStyleModel",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        }
      }
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
