{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"gpuType":"A100","machine_shape":"hm"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"accelerator":"GPU","widgets":{"application/vnd.jupyter.widget-state+json":{"c53c0c0aa3c14d8e972db0c2a429187b":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HBoxModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HBoxView","box_style":"","children":["IPY_MODEL_8a101039a4d444008a779318a0ee492f","IPY_MODEL_5e33aa7503ce4c2bb721caa5360e65e3","IPY_MODEL_1075bd2883a647a996169373033a70a0"],"layout":"IPY_MODEL_c66752f780de49abb6b32cb293166611"}},"8a101039a4d444008a779318a0ee492f":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_cbf1c5ee5976493c9085bd867941b021","placeholder":"​","style":"IPY_MODEL_2afe2c0f8d1747a7b7d26b652149d75f","value":"Loading checkpoint shards: 100%"}},"5e33aa7503ce4c2bb721caa5360e65e3":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"FloatProgressModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"ProgressView","bar_style":"success","description":"","description_tooltip":null,"layout":"IPY_MODEL_8afdb828d27a46529d37e53eb855fc4c","max":3,"min":0,"orientation":"horizontal","style":"IPY_MODEL_c95054262870443abaf096d2691b3621","value":3}},"1075bd2883a647a996169373033a70a0":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_dom_classes":[],"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"HTMLModel","_view_count":null,"_view_module":"@jupyter-widgets/controls","_view_module_version":"1.5.0","_view_name":"HTMLView","description":"","description_tooltip":null,"layout":"IPY_MODEL_6236842858044976838df84897108164","placeholder":"​","style":"IPY_MODEL_cc46f91c30ab4e4ca8dfdbdfb3480095","value":" 3/3 [10:04&lt;00:00, 182.69s/it]"}},"c66752f780de49abb6b32cb293166611":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cbf1c5ee5976493c9085bd867941b021":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"2afe2c0f8d1747a7b7d26b652149d75f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}},"8afdb828d27a46529d37e53eb855fc4c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"c95054262870443abaf096d2691b3621":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"ProgressStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","bar_color":null,"description_width":""}},"6236842858044976838df84897108164":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_model_module":"@jupyter-widgets/base","_model_module_version":"1.2.0","_model_name":"LayoutModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"LayoutView","align_content":null,"align_items":null,"align_self":null,"border":null,"bottom":null,"display":null,"flex":null,"flex_flow":null,"grid_area":null,"grid_auto_columns":null,"grid_auto_flow":null,"grid_auto_rows":null,"grid_column":null,"grid_gap":null,"grid_row":null,"grid_template_areas":null,"grid_template_columns":null,"grid_template_rows":null,"height":null,"justify_content":null,"justify_items":null,"left":null,"margin":null,"max_height":null,"max_width":null,"min_height":null,"min_width":null,"object_fit":null,"object_position":null,"order":null,"overflow":null,"overflow_x":null,"overflow_y":null,"padding":null,"right":null,"top":null,"visibility":null,"width":null}},"cc46f91c30ab4e4ca8dfdbdfb3480095":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_model_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_model_name":"DescriptionStyleModel","_view_count":null,"_view_module":"@jupyter-widgets/base","_view_module_version":"1.2.0","_view_name":"StyleView","description_width":""}}}}},"cells":[{"cell_type":"code","source":["from google.colab import drive\n","drive.mount('/content/drive')"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"wFby1AS2wpUv","executionInfo":{"status":"ok","timestamp":1726110154243,"user_tz":-480,"elapsed":21387,"user":{"displayName":"aubrie barbella","userId":"12235799605179884781"}},"outputId":"2e558814-aa3e-4fb7-9cb8-d37e0f7f1590"},"execution_count":1,"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}]},{"cell_type":"code","execution_count":2,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"etPVbYM2VxIY","executionInfo":{"status":"ok","timestamp":1726110161843,"user_tz":-480,"elapsed":5440,"user":{"displayName":"aubrie barbella","userId":"12235799605179884781"}},"outputId":"40ea590b-9910-43fa-ae44-41d015fe14a5"},"outputs":[{"output_type":"stream","name":"stdout","text":["Cloning into 'InternLM-XComposer'...\n","remote: Enumerating objects: 1942, done.\u001b[K\n","remote: Counting objects: 100% (948/948), done.\u001b[K\n","remote: Compressing objects: 100% (513/513), done.\u001b[K\n","remote: Total 1942 (delta 560), reused 686 (delta 419), pack-reused 994 (from 1)\u001b[K\n","Receiving objects: 100% (1942/1942), 68.63 MiB | 16.46 MiB/s, done.\n","Resolving deltas: 100% (1018/1018), done.\n","/content/InternLM-XComposer/finetune\n"]}],"source":["!git clone https://github.com/InternLM/InternLM-XComposer.git\n","%cd InternLM-XComposer/finetune/"]},{"cell_type":"code","source":["!pip install torch==2.0.1+cu117 torchvision==0.15.2+cu117 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu117\n","!pip install transformers==4.33.2 timm==0.4.12 sentencepiece==0.1.99 gradio==4.13.0 markdown2==2.4.10 xlsxwriter==3.1.2 einops\n","!pip install deepspeed==0.12.3\n","!pip install peft==0.8.2\n","!pip install decord\n","!pip install flash-attn"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"L4fXj11gWnph","executionInfo":{"status":"ok","timestamp":1726110318396,"user_tz":-480,"elapsed":154598,"user":{"displayName":"aubrie barbella","userId":"12235799605179884781"}},"outputId":"012bef64-8a2c-46ca-a281-3b3f5decc8fa"},"execution_count":3,"outputs":[{"output_type":"stream","name":"stdout","text":["Looking in indexes: https://download.pytorch.org/whl/cu117\n","Collecting torch==2.0.1+cu117\n","  Downloading https://download.pytorch.org/whl/cu117/torch-2.0.1%2Bcu117-cp310-cp310-linux_x86_64.whl (1843.9 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 GB\u001b[0m \u001b[31m941.5 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting torchvision==0.15.2+cu117\n","  Downloading https://download.pytorch.org/whl/cu117/torchvision-0.15.2%2Bcu117-cp310-cp310-linux_x86_64.whl (6.1 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6.1/6.1 MB\u001b[0m \u001b[31m105.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting torchaudio==2.0.2\n","  Downloading https://download.pytorch.org/whl/cu117/torchaudio-2.0.2%2Bcu117-cp310-cp310-linux_x86_64.whl (4.4 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m4.4/4.4 MB\u001b[0m \u001b[31m74.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1+cu117) (3.16.0)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1+cu117) (4.12.2)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1+cu117) (1.13.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1+cu117) (3.3)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch==2.0.1+cu117) (3.1.4)\n","Collecting triton==2.0.0 (from torch==2.0.1+cu117)\n","  Downloading https://download.pytorch.org/whl/triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.3 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m63.3/63.3 MB\u001b[0m \u001b[31m35.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from torchvision==0.15.2+cu117) (1.26.4)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from torchvision==0.15.2+cu117) (2.32.3)\n","Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision==0.15.2+cu117) (9.4.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch==2.0.1+cu117) (3.30.3)\n","Collecting lit (from triton==2.0.0->torch==2.0.1+cu117)\n","  Downloading https://download.pytorch.org/whl/lit-15.0.7.tar.gz (132 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.3/132.3 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch==2.0.1+cu117) (2.1.5)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.15.2+cu117) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.15.2+cu117) (3.8)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.15.2+cu117) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->torchvision==0.15.2+cu117) (2024.8.30)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch==2.0.1+cu117) (1.3.0)\n","Building wheels for collected packages: lit\n","  Building wheel for lit (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for lit: filename=lit-15.0.7-py3-none-any.whl size=89990 sha256=aa24546c4c98ae21d02230546d35b8e9a9d69525309038fbce10bac8b3417286\n","  Stored in directory: /root/.cache/pip/wheels/27/2c/b6/3ed2983b1b44fe0dea1bb35234b09f2c22fb8ebb308679c922\n","Successfully built lit\n","Installing collected packages: lit, triton, torch, torchvision, torchaudio\n","  Attempting uninstall: torch\n","    Found existing installation: torch 2.4.0+cu121\n","    Uninstalling torch-2.4.0+cu121:\n","      Successfully uninstalled torch-2.4.0+cu121\n","  Attempting uninstall: torchvision\n","    Found existing installation: torchvision 0.19.0+cu121\n","    Uninstalling torchvision-0.19.0+cu121:\n","      Successfully uninstalled torchvision-0.19.0+cu121\n","  Attempting uninstall: torchaudio\n","    Found existing installation: torchaudio 2.4.0+cu121\n","    Uninstalling torchaudio-2.4.0+cu121:\n","      Successfully uninstalled torchaudio-2.4.0+cu121\n","Successfully installed lit-15.0.7 torch-2.0.1+cu117 torchaudio-2.0.2+cu117 torchvision-0.15.2+cu117 triton-2.0.0\n","Collecting transformers==4.33.2\n","  Downloading transformers-4.33.2-py3-none-any.whl.metadata (119 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m119.9/119.9 kB\u001b[0m \u001b[31m9.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hCollecting timm==0.4.12\n","  Downloading timm-0.4.12-py3-none-any.whl.metadata (30 kB)\n","Requirement already satisfied: sentencepiece==0.1.99 in /usr/local/lib/python3.10/dist-packages (0.1.99)\n","Collecting gradio==4.13.0\n","  Downloading gradio-4.13.0-py3-none-any.whl.metadata (15 kB)\n","Collecting markdown2==2.4.10\n","  Downloading markdown2-2.4.10-py2.py3-none-any.whl.metadata (2.0 kB)\n","Collecting xlsxwriter==3.1.2\n","  Downloading XlsxWriter-3.1.2-py3-none-any.whl.metadata (2.5 kB)\n","Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (0.8.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers==4.33.2) (3.16.0)\n","Requirement already satisfied: huggingface-hub<1.0,>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.33.2) (0.24.6)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.33.2) (1.26.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers==4.33.2) (24.1)\n","Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.33.2) (6.0.2)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers==4.33.2) (2024.5.15)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers==4.33.2) (2.32.3)\n","Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.33.2)\n","  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)\n","Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers==4.33.2) (0.4.5)\n","Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers==4.33.2) (4.66.5)\n","Requirement already satisfied: torch>=1.4 in /usr/local/lib/python3.10/dist-packages (from timm==0.4.12) (2.0.1+cu117)\n","Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from timm==0.4.12) (0.15.2+cu117)\n","Collecting aiofiles<24.0,>=22.0 (from gradio==4.13.0)\n","  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\n","Requirement already satisfied: altair<6.0,>=4.2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.13.0) (4.2.2)\n","Collecting fastapi (from gradio==4.13.0)\n","  Downloading fastapi-0.114.1-py3-none-any.whl.metadata (27 kB)\n","Collecting ffmpy (from gradio==4.13.0)\n","  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)\n","Collecting gradio-client==0.8.0 (from gradio==4.13.0)\n","  Downloading gradio_client-0.8.0-py3-none-any.whl.metadata (7.1 kB)\n","Collecting httpx (from gradio==4.13.0)\n","  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)\n","Requirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio==4.13.0) (6.4.5)\n","Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.13.0) (3.1.4)\n","Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.13.0) (2.1.5)\n","Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.13.0) (3.7.1)\n","Collecting orjson~=3.0 (from gradio==4.13.0)\n","  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m50.4/50.4 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hRequirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.13.0) (2.1.4)\n","Requirement already satisfied: pillow<11.0,>=8.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.13.0) (9.4.0)\n","Requirement already satisfied: pydantic>=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.13.0) (2.9.1)\n","Collecting pydub (from gradio==4.13.0)\n","  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)\n","Collecting python-multipart (from gradio==4.13.0)\n","  Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)\n","Collecting semantic-version~=2.0 (from gradio==4.13.0)\n","  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)\n","Collecting tomlkit==0.12.0 (from gradio==4.13.0)\n","  Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\n","Requirement already satisfied: typer<1.0,>=0.9 in /usr/local/lib/python3.10/dist-packages (from typer[all]<1.0,>=0.9->gradio==4.13.0) (0.12.5)\n","Requirement already satisfied: typing-extensions~=4.0 in /usr/local/lib/python3.10/dist-packages (from gradio==4.13.0) (4.12.2)\n","Collecting uvicorn>=0.14.0 (from gradio==4.13.0)\n","  Downloading uvicorn-0.30.6-py3-none-any.whl.metadata (6.6 kB)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==0.8.0->gradio==4.13.0) (2024.6.1)\n","Collecting websockets<12.0,>=10.0 (from gradio-client==0.8.0->gradio==4.13.0)\n","  Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)\n","Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==4.13.0) (0.4)\n","Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==4.13.0) (4.23.0)\n","Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair<6.0,>=4.2.0->gradio==4.13.0) (0.12.1)\n","Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.13.0) (1.3.0)\n","Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.13.0) (0.12.1)\n","Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.13.0) (4.53.1)\n","Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.13.0) (1.4.7)\n","Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.13.0) (3.1.4)\n","Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio==4.13.0) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio==4.13.0) (2024.1)\n","Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio==4.13.0) (2024.1)\n","Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio==4.13.0) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.23.3 in /usr/local/lib/python3.10/dist-packages (from pydantic>=2.0->gradio==4.13.0) (2.23.3)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.4->timm==0.4.12) (1.13.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.4->timm==0.4.12) (3.3)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.4->timm==0.4.12) (2.0.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.4->timm==0.4.12) (3.30.3)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.4->timm==0.4.12) (15.0.7)\n","Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.13.0) (8.1.7)\n","Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.13.0) (1.5.4)\n","Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.13.0) (13.8.0)\n","\u001b[33mWARNING: typer 0.12.5 does not provide the extra 'all'\u001b[0m\u001b[33m\n","\u001b[0mCollecting h11>=0.8 (from uvicorn>=0.14.0->gradio==4.13.0)\n","  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n","Collecting starlette<0.39.0,>=0.37.2 (from fastapi->gradio==4.13.0)\n","  Downloading starlette-0.38.5-py3-none-any.whl.metadata (6.0 kB)\n","Requirement already satisfied: anyio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio==4.13.0) (3.7.1)\n","Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx->gradio==4.13.0) (2024.8.30)\n","Collecting httpcore==1.* (from httpx->gradio==4.13.0)\n","  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n","Requirement already satisfied: idna in /usr/local/lib/python3.10/dist-packages (from httpx->gradio==4.13.0) (3.8)\n","Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from httpx->gradio==4.13.0) (1.3.1)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.33.2) (3.3.2)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers==4.33.2) (2.0.7)\n","Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.13.0) (24.2.0)\n","Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.13.0) (2023.12.1)\n","Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.13.0) (0.35.1)\n","Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio==4.13.0) (0.20.0)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio==4.13.0) (1.16.0)\n","Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.13.0) (3.0.0)\n","Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.13.0) (2.16.1)\n","Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio->httpx->gradio==4.13.0) (1.2.2)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.4->timm==0.4.12) (1.3.0)\n","Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio==4.13.0) (0.1.2)\n","Downloading transformers-4.33.2-py3-none-any.whl (7.6 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.6/7.6 MB\u001b[0m \u001b[31m119.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading timm-0.4.12-py3-none-any.whl (376 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m377.0/377.0 kB\u001b[0m \u001b[31m33.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading gradio-4.13.0-py3-none-any.whl (16.6 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.6/16.6 MB\u001b[0m \u001b[31m86.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading markdown2-2.4.10-py2.py3-none-any.whl (39 kB)\n","Downloading XlsxWriter-3.1.2-py3-none-any.whl (153 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m153.0/153.0 kB\u001b[0m \u001b[31m15.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading gradio_client-0.8.0-py3-none-any.whl (305 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m305.1/305.1 kB\u001b[0m \u001b[31m24.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n","Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n","Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m141.9/141.9 kB\u001b[0m \u001b[31m13.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n","Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.8/7.8 MB\u001b[0m \u001b[31m120.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading uvicorn-0.30.6-py3-none-any.whl (62 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.8/62.8 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading fastapi-0.114.1-py3-none-any.whl (94 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m94.0/94.0 kB\u001b[0m \u001b[31m9.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading ffmpy-0.4.0-py3-none-any.whl (5.8 kB)\n","Downloading httpx-0.27.2-py3-none-any.whl (76 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m7.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.9/77.9 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n","Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n","Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m6.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading starlette-0.38.5-py3-none-any.whl (71 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.4/71.4 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.9/129.9 kB\u001b[0m \u001b[31m13.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: tokenizers, pydub, xlsxwriter, websockets, tomlkit, semantic-version, python-multipart, orjson, markdown2, h11, ffmpy, aiofiles, uvicorn, starlette, httpcore, transformers, httpx, fastapi, gradio-client, gradio, timm\n","  Attempting uninstall: tokenizers\n","    Found existing installation: tokenizers 0.19.1\n","    Uninstalling tokenizers-0.19.1:\n","      Successfully uninstalled tokenizers-0.19.1\n","  Attempting uninstall: tomlkit\n","    Found existing installation: tomlkit 0.13.2\n","    Uninstalling tomlkit-0.13.2:\n","      Successfully uninstalled tomlkit-0.13.2\n","  Attempting uninstall: transformers\n","    Found existing installation: transformers 4.44.2\n","    Uninstalling transformers-4.44.2:\n","      Successfully uninstalled transformers-4.44.2\n","Successfully installed aiofiles-23.2.1 fastapi-0.114.1 ffmpy-0.4.0 gradio-4.13.0 gradio-client-0.8.0 h11-0.14.0 httpcore-1.0.5 httpx-0.27.2 markdown2-2.4.10 orjson-3.10.7 pydub-0.25.1 python-multipart-0.0.9 semantic-version-2.10.0 starlette-0.38.5 timm-0.4.12 tokenizers-0.13.3 tomlkit-0.12.0 transformers-4.33.2 uvicorn-0.30.6 websockets-11.0.3 xlsxwriter-3.1.2\n","Collecting deepspeed==0.12.3\n","  Downloading deepspeed-0.12.3.tar.gz (1.2 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m51.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Collecting hjson (from deepspeed==0.12.3)\n","  Downloading hjson-3.1.0-py3-none-any.whl.metadata (2.6 kB)\n","Collecting ninja (from deepspeed==0.12.3)\n","  Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl.metadata (5.3 kB)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from deepspeed==0.12.3) (1.26.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from deepspeed==0.12.3) (24.1)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from deepspeed==0.12.3) (5.9.5)\n","Requirement already satisfied: py-cpuinfo in /usr/local/lib/python3.10/dist-packages (from deepspeed==0.12.3) (9.0.0)\n","Requirement already satisfied: pydantic in /usr/local/lib/python3.10/dist-packages (from deepspeed==0.12.3) (2.9.1)\n","Collecting pynvml (from deepspeed==0.12.3)\n","  Downloading pynvml-11.5.3-py3-none-any.whl.metadata (8.8 kB)\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from deepspeed==0.12.3) (2.0.1+cu117)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from deepspeed==0.12.3) (4.66.5)\n","Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.10/dist-packages (from pydantic->deepspeed==0.12.3) (0.7.0)\n","Requirement already satisfied: pydantic-core==2.23.3 in /usr/local/lib/python3.10/dist-packages (from pydantic->deepspeed==0.12.3) (2.23.3)\n","Requirement already satisfied: typing-extensions>=4.6.1 in /usr/local/lib/python3.10/dist-packages (from pydantic->deepspeed==0.12.3) (4.12.2)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed==0.12.3) (3.16.0)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed==0.12.3) (1.13.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed==0.12.3) (3.3)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed==0.12.3) (3.1.4)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->deepspeed==0.12.3) (2.0.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->deepspeed==0.12.3) (3.30.3)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->deepspeed==0.12.3) (15.0.7)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->deepspeed==0.12.3) (2.1.5)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->deepspeed==0.12.3) (1.3.0)\n","Downloading hjson-3.1.0-py3-none-any.whl (54 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.0/54.0 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.2/307.2 kB\u001b[0m \u001b[31m28.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hDownloading pynvml-11.5.3-py3-none-any.whl (53 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.1/53.1 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hBuilding wheels for collected packages: deepspeed\n","  Building wheel for deepspeed (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for deepspeed: filename=deepspeed-0.12.3-py3-none-any.whl size=1279156 sha256=932b4abe848179bc41776ba10b6d6877824c156e547235dbe2c4c6c2c1aef429\n","  Stored in directory: /root/.cache/pip/wheels/ee/2b/c5/892ceee06964ce8aa2a98d4260848d0d9a3f1e743862e4b45a\n","Successfully built deepspeed\n","Installing collected packages: ninja, hjson, pynvml, deepspeed\n","Successfully installed deepspeed-0.12.3 hjson-3.1.0 ninja-1.11.1.1 pynvml-11.5.3\n","Collecting peft==0.8.2\n","  Downloading peft-0.8.2-py3-none-any.whl.metadata (25 kB)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (1.26.4)\n","Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (24.1)\n","Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (5.9.5)\n","Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (6.0.2)\n","Requirement already satisfied: torch>=1.13.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (2.0.1+cu117)\n","Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (4.33.2)\n","Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (4.66.5)\n","Requirement already satisfied: accelerate>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (0.33.0)\n","Requirement already satisfied: safetensors in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (0.4.5)\n","Requirement already satisfied: huggingface-hub>=0.17.0 in /usr/local/lib/python3.10/dist-packages (from peft==0.8.2) (0.24.6)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft==0.8.2) (3.16.0)\n","Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft==0.8.2) (2024.6.1)\n","Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft==0.8.2) (2.32.3)\n","Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.17.0->peft==0.8.2) (4.12.2)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.8.2) (1.13.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.8.2) (3.3)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.8.2) (3.1.4)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.13.0->peft==0.8.2) (2.0.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft==0.8.2) (3.30.3)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch>=1.13.0->peft==0.8.2) (15.0.7)\n","Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.8.2) (2024.5.15)\n","Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /usr/local/lib/python3.10/dist-packages (from transformers->peft==0.8.2) (0.13.3)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.13.0->peft==0.8.2) (2.1.5)\n","Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft==0.8.2) (3.3.2)\n","Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft==0.8.2) (3.8)\n","Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft==0.8.2) (2.0.7)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.17.0->peft==0.8.2) (2024.8.30)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.13.0->peft==0.8.2) (1.3.0)\n","Downloading peft-0.8.2-py3-none-any.whl (183 kB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m183.4/183.4 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: peft\n","Successfully installed peft-0.8.2\n","Collecting decord\n","  Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl.metadata (422 bytes)\n","Requirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from decord) (1.26.4)\n","Downloading decord-0.6.0-py3-none-manylinux2010_x86_64.whl (13.6 MB)\n","\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.6/13.6 MB\u001b[0m \u001b[31m92.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25hInstalling collected packages: decord\n","Successfully installed decord-0.6.0\n","Collecting flash-attn\n","  Downloading flash_attn-2.6.3.tar.gz (2.6 MB)\n","\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m62.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n","Requirement already satisfied: torch in /usr/local/lib/python3.10/dist-packages (from flash-attn) (2.0.1+cu117)\n","Requirement already satisfied: einops in /usr/local/lib/python3.10/dist-packages (from flash-attn) (0.8.0)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.16.0)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (4.12.2)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (1.13.2)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.3)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (3.1.4)\n","Requirement already satisfied: triton==2.0.0 in /usr/local/lib/python3.10/dist-packages (from torch->flash-attn) (2.0.0)\n","Requirement already satisfied: cmake in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->flash-attn) (3.30.3)\n","Requirement already satisfied: lit in /usr/local/lib/python3.10/dist-packages (from triton==2.0.0->torch->flash-attn) (15.0.7)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch->flash-attn) (2.1.5)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch->flash-attn) (1.3.0)\n","Building wheels for collected packages: flash-attn\n","  Building wheel for flash-attn (setup.py) ... \u001b[?25l\u001b[?25hdone\n","  Created wheel for flash-attn: filename=flash_attn-2.6.3-cp310-cp310-linux_x86_64.whl size=188930485 sha256=f242b686e89e4bd88474ac4ceb7a5b19d738b4e63e2132f50edd7b6f397f8654\n","  Stored in directory: /root/.cache/pip/wheels/7e/e3/c3/89c7a2f3c4adc07cd1c675f8bb7b9ad4d18f64a72bccdfe826\n","Successfully built flash-attn\n","Installing collected packages: flash-attn\n","Successfully installed flash-attn-2.6.3\n"]}]},{"cell_type":"markdown","source":["/content/drive/MyDrive/label.json 1"],"metadata":{"id":"jHtYxUqWcLkN"}},{"cell_type":"markdown","source":["#!/bin/bash\n","export CUDA_DEVICE_MAX_CONNECTIONS=1\n","DIR=`pwd`\n","\n","export MODEL=\"internlm/internlm-xcomposer2d5-7b\"\n","# export DATA=\"path of data\"\n","export DATA=\"data.txt\"\n","\n","GPUS_PER_NODE=1\n","NNODES=1\n","NODE_RANK=0\n","MASTER_ADDR=localhost\n","MASTER_PORT=6001\n","\n","DISTRIBUTED_ARGS=\"\n","    --nproc_per_node $GPUS_PER_NODE \\\n","    --nnodes $NNODES \\\n","    --node_rank $NODE_RANK \\\n","    --master_addr $MASTER_ADDR \\\n","    --master_port $MASTER_PORT\n","\"\n","\n","torchrun $DISTRIBUTED_ARGS finetune.py \\\n","    --model_name_or_path $MODEL \\\n","    --data_path $DATA \\\n","    --given_num True \\\n","    --bf16 True \\\n","    --fix_vit True \\\n","    --fix_sampler False \\\n","    --use_lora True \\\n","    --hd_num 18 \\\n","    --output_dir /content/drive/MyDrive/output/finetune_lora \\\n","    --num_train_epochs 2 \\\n","    --batch_size 1 \\\n","    --per_device_train_batch_size 1 \\\n","    --per_device_eval_batch_size 1 \\\n","    --gradient_accumulation_steps 8 \\\n","    --evaluation_strategy \"no\" \\\n","    --save_strategy \"epoch\" \\\n","    --save_total_limit 1 \\\n","    --learning_rate 5e-5 \\\n","    --weight_decay 0.1 \\\n","    --adam_beta2 0.95 \\\n","    --warmup_ratio 0.01 \\\n","    --lr_scheduler_type \"cosine\" \\\n","    --logging_steps 1 \\\n","    --report_to \"none\" \\\n","    --max_length 16384 \\\n","    --deepspeed ds_config_zero2.json \\\n","    --gradient_checkpointing True"],"metadata":{"id":"f89Jm-NRcdTi"}},{"cell_type":"code","source":["!sh finetune_lora.sh\n","\n","!python3 merge_peft_adapter.py --adapter_model_name=/content/drive/MyDrive/output/finetune_lora --base_model_name=internlm/internlm-xcomposer2d5-7b --output_name=/content/drive/MyDrive/output/finetune_full"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"bgg8etGNXZUl","outputId":"ad0b1d5c-53ad-4862-82ef-095b3a2476f9","executionInfo":{"status":"ok","timestamp":1726115934637,"user_tz":-480,"elapsed":5609119,"user":{"displayName":"aubrie barbella","userId":"12235799605179884781"}}},"execution_count":4,"outputs":[{"output_type":"stream","name":"stdout","text":["[2024-09-12 03:05:29,247] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n","2024-09-12 03:05:33.151151: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-09-12 03:05:33.172334: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-09-12 03:05:33.178783: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-09-12 03:05:34.401526: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","/usr/local/lib/python3.10/dist-packages/transformers/deepspeed.py:23: FutureWarning: transformers.deepspeed module is deprecated and will be removed in a future version. Please import deepspeed modules directly from transformers.integrations\n","  warnings.warn(\n","[2024-09-12 03:05:34,926] [INFO] [comm.py:637:init_distributed] cdb=None\n","[2024-09-12 03:05:34,926] [INFO] [comm.py:668:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl\n","/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n","  warnings.warn(\n","config.json: 100% 1.09k/1.09k [00:00<00:00, 9.17MB/s]\n","configuration_internlm_xcomposer2.py: 100% 6.98k/6.98k [00:00<00:00, 53.5MB/s]\n","A new version of the following files was downloaded from https://huggingface.co/internlm/internlm-xcomposer2d5-7b:\n","- configuration_internlm_xcomposer2.py\n",". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n","Load model from: internlm/internlm-xcomposer2d5-7b\n","modeling_internlm_xcomposer2.py: 100% 38.4k/38.4k [00:00<00:00, 39.6MB/s]\n","build_mlp.py: 100% 8.95k/8.95k [00:00<00:00, 60.2MB/s]\n","A new version of the following files was downloaded from https://huggingface.co/internlm/internlm-xcomposer2d5-7b:\n","- build_mlp.py\n",". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n","ixc_utils.py: 100% 4.62k/4.62k [00:00<00:00, 30.8MB/s]\n","A new version of the following files was downloaded from https://huggingface.co/internlm/internlm-xcomposer2d5-7b:\n","- ixc_utils.py\n",". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n","modeling_internlm2.py: 100% 43.8k/43.8k [00:00<00:00, 195MB/s]\n","A new version of the following files was downloaded from https://huggingface.co/internlm/internlm-xcomposer2d5-7b:\n","- modeling_internlm2.py\n",". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n","A new version of the following files was downloaded from https://huggingface.co/internlm/internlm-xcomposer2d5-7b:\n","- modeling_internlm_xcomposer2.py\n","- build_mlp.py\n","- ixc_utils.py\n","- modeling_internlm2.py\n",". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n","pytorch_model.bin.index.json: 100% 178k/178k [00:00<00:00, 74.3MB/s]\n","Downloading shards:   0% 0/3 [00:00<?, ?it/s]\n","pytorch_model-00001-of-00003.bin:   0% 0.00/9.97G [00:00<?, ?B/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   0% 31.5M/9.97G [00:00<00:40, 247MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   1% 83.9M/9.97G [00:00<00:26, 366MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   1% 126M/9.97G [00:00<00:25, 381MB/s] \u001b[A\n","pytorch_model-00001-of-00003.bin:   2% 168M/9.97G [00:00<00:25, 377MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   2% 210M/9.97G [00:00<00:25, 377MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   3% 252M/9.97G [00:00<00:25, 385MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   3% 294M/9.97G [00:00<00:27, 349MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   3% 336M/9.97G [00:00<00:27, 348MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   4% 377M/9.97G [00:01<00:27, 346MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   4% 430M/9.97G [00:01<00:25, 372MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   5% 472M/9.97G [00:01<00:26, 358MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   5% 514M/9.97G [00:01<00:25, 364MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   6% 556M/9.97G [00:01<00:26, 362MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   6% 598M/9.97G [00:01<00:25, 371MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   7% 650M/9.97G [00:01<00:23, 396MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   7% 692M/9.97G [00:01<00:24, 381MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   7% 734M/9.97G [00:02<00:26, 345MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   8% 776M/9.97G [00:02<00:27, 333MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   8% 818M/9.97G [00:02<00:28, 316MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   9% 860M/9.97G [00:02<00:32, 283MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   9% 891M/9.97G [00:02<00:31, 287MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:   9% 923M/9.97G [00:02<00:31, 289MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  10% 954M/9.97G [00:02<00:37, 243MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  10% 1.01G/9.97G [00:03<00:35, 254MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  10% 1.04G/9.97G [00:03<00:38, 233MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  11% 1.07G/9.97G [00:03<00:39, 224MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  11% 1.11G/9.97G [00:03<00:35, 252MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  12% 1.15G/9.97G [00:03<00:32, 270MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  12% 1.20G/9.97G [00:03<00:30, 284MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  12% 1.23G/9.97G [00:03<00:33, 259MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  13% 1.26G/9.97G [00:04<00:32, 269MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  13% 1.30G/9.97G [00:04<00:30, 288MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  13% 1.34G/9.97G [00:04<00:28, 307MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  14% 1.39G/9.97G [00:04<00:24, 343MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  14% 1.44G/9.97G [00:04<00:27, 309MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  15% 1.49G/9.97G [00:04<00:24, 345MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  15% 1.54G/9.97G [00:04<00:22, 380MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  16% 1.58G/9.97G [00:04<00:22, 373MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  16% 1.63G/9.97G [00:05<00:22, 375MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  17% 1.67G/9.97G [00:05<00:21, 379MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  17% 1.71G/9.97G [00:05<00:22, 373MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  18% 1.75G/9.97G [00:05<00:22, 360MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  18% 1.79G/9.97G [00:05<00:22, 366MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  19% 1.85G/9.97G [00:05<00:20, 394MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  19% 1.90G/9.97G [00:05<00:19, 414MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  20% 1.95G/9.97G [00:05<00:18, 431MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  20% 2.00G/9.97G [00:05<00:17, 452MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  21% 2.06G/9.97G [00:06<00:18, 432MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  21% 2.11G/9.97G [00:06<00:21, 363MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  22% 2.15G/9.97G [00:06<00:22, 350MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  22% 2.19G/9.97G [00:06<00:22, 341MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  22% 2.23G/9.97G [00:06<00:21, 355MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  23% 2.29G/9.97G [00:06<00:19, 396MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  23% 2.34G/9.97G [00:06<00:18, 419MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  24% 2.39G/9.97G [00:06<00:17, 432MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  25% 2.44G/9.97G [00:07<00:17, 432MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  25% 2.50G/9.97G [00:07<00:16, 441MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  26% 2.55G/9.97G [00:07<00:16, 444MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  26% 2.60G/9.97G [00:07<00:17, 420MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  27% 2.65G/9.97G [00:07<00:20, 352MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  27% 2.71G/9.97G [00:07<00:19, 378MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  28% 2.76G/9.97G [00:07<00:18, 393MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  28% 2.80G/9.97G [00:08<00:19, 362MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  29% 2.85G/9.97G [00:08<00:19, 373MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  29% 2.89G/9.97G [00:08<00:20, 349MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  29% 2.94G/9.97G [00:08<00:20, 350MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  30% 2.98G/9.97G [00:08<00:19, 358MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  30% 3.02G/9.97G [00:08<00:19, 355MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  31% 3.06G/9.97G [00:08<00:20, 331MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  31% 3.10G/9.97G [00:08<00:20, 339MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  32% 3.15G/9.97G [00:09<00:22, 299MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  32% 3.18G/9.97G [00:09<00:22, 298MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  32% 3.22G/9.97G [00:09<00:21, 312MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  33% 3.26G/9.97G [00:09<00:22, 304MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  33% 3.29G/9.97G [00:09<00:22, 295MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  33% 3.32G/9.97G [00:09<00:22, 294MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  34% 3.37G/9.97G [00:09<00:20, 320MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  34% 3.41G/9.97G [00:09<00:20, 320MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  35% 3.45G/9.97G [00:10<00:20, 316MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  35% 3.49G/9.97G [00:10<00:22, 293MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  35% 3.53G/9.97G [00:10<00:20, 311MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  36% 3.59G/9.97G [00:10<00:19, 333MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  37% 3.64G/9.97G [00:10<00:17, 356MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  37% 3.68G/9.97G [00:10<00:18, 338MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  37% 3.72G/9.97G [00:10<00:17, 356MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  38% 3.77G/9.97G [00:10<00:15, 388MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  38% 3.82G/9.97G [00:11<00:16, 368MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  39% 3.86G/9.97G [00:11<00:16, 373MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  39% 3.90G/9.97G [00:11<00:16, 369MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  40% 3.94G/9.97G [00:11<00:17, 352MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  40% 3.98G/9.97G [00:11<00:17, 347MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  40% 4.03G/9.97G [00:11<00:16, 355MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  41% 4.07G/9.97G [00:11<00:20, 288MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  41% 4.10G/9.97G [00:12<00:21, 275MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  42% 4.14G/9.97G [00:12<00:18, 307MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  42% 4.19G/9.97G [00:12<00:16, 352MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  43% 4.25G/9.97G [00:12<00:14, 383MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  43% 4.30G/9.97G [00:12<00:14, 402MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  44% 4.35G/9.97G [00:12<00:13, 415MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  44% 4.40G/9.97G [00:12<00:13, 425MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  45% 4.46G/9.97G [00:12<00:13, 394MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  45% 4.50G/9.97G [00:13<00:14, 380MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  46% 4.54G/9.97G [00:13<00:14, 383MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  46% 4.59G/9.97G [00:13<00:13, 403MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  47% 4.65G/9.97G [00:13<00:13, 408MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  47% 4.69G/9.97G [00:13<00:13, 396MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  47% 4.73G/9.97G [00:13<00:14, 358MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  48% 4.77G/9.97G [00:13<00:14, 353MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  48% 4.81G/9.97G [00:13<00:14, 359MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  49% 4.85G/9.97G [00:13<00:14, 354MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  49% 4.90G/9.97G [00:14<00:14, 354MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  50% 4.94G/9.97G [00:14<00:14, 347MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  50% 4.98G/9.97G [00:14<00:13, 364MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  50% 5.02G/9.97G [00:14<00:14, 352MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  51% 5.06G/9.97G [00:14<00:13, 357MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  51% 5.11G/9.97G [00:14<00:13, 360MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  52% 5.15G/9.97G [00:14<00:16, 301MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  52% 5.19G/9.97G [00:14<00:14, 327MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  52% 5.23G/9.97G [00:15<00:14, 338MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  53% 5.27G/9.97G [00:15<00:17, 276MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  53% 5.32G/9.97G [00:15<00:15, 308MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  54% 5.36G/9.97G [00:15<00:15, 296MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  54% 5.40G/9.97G [00:15<00:15, 301MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  55% 5.44G/9.97G [00:15<00:16, 283MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  55% 5.47G/9.97G [00:15<00:16, 280MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  55% 5.51G/9.97G [00:16<00:15, 283MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  56% 5.55G/9.97G [00:16<00:14, 296MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  56% 5.58G/9.97G [00:16<00:15, 288MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  56% 5.61G/9.97G [00:16<00:15, 285MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  57% 5.65G/9.97G [00:16<00:14, 293MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  57% 5.69G/9.97G [00:16<00:14, 300MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  58% 5.74G/9.97G [00:17<00:20, 211MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  58% 5.79G/9.97G [00:17<00:15, 265MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  59% 5.84G/9.97G [00:17<00:13, 298MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  59% 5.88G/9.97G [00:17<00:12, 322MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  59% 5.92G/9.97G [00:17<00:12, 317MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  60% 5.97G/9.97G [00:17<00:12, 327MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  60% 6.01G/9.97G [00:17<00:11, 347MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  61% 6.06G/9.97G [00:17<00:10, 381MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  61% 6.11G/9.97G [00:17<00:09, 391MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  62% 6.16G/9.97G [00:18<00:10, 359MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  62% 6.21G/9.97G [00:18<00:09, 386MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  63% 6.25G/9.97G [00:18<00:10, 369MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  63% 6.29G/9.97G [00:18<00:10, 359MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  64% 6.33G/9.97G [00:18<00:10, 357MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  64% 6.39G/9.97G [00:18<00:09, 383MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  65% 6.44G/9.97G [00:18<00:08, 414MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  65% 6.49G/9.97G [00:18<00:07, 439MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  66% 6.54G/9.97G [00:19<00:07, 440MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  66% 6.60G/9.97G [00:19<00:08, 392MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  67% 6.64G/9.97G [00:19<00:10, 329MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  67% 6.69G/9.97G [00:19<00:09, 357MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  68% 6.73G/9.97G [00:19<00:08, 364MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  68% 6.77G/9.97G [00:19<00:10, 311MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  68% 6.82G/9.97G [00:19<00:09, 331MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  69% 6.87G/9.97G [00:20<00:08, 360MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  69% 6.92G/9.97G [00:20<00:07, 390MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  70% 6.96G/9.97G [00:20<00:07, 394MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  70% 7.00G/9.97G [00:20<00:07, 375MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  71% 7.05G/9.97G [00:20<00:07, 381MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  71% 7.09G/9.97G [00:20<00:07, 380MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  72% 7.14G/9.97G [00:20<00:07, 399MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  72% 7.19G/9.97G [00:20<00:06, 417MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  73% 7.25G/9.97G [00:20<00:06, 429MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  73% 7.30G/9.97G [00:21<00:06, 440MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  74% 7.35G/9.97G [00:21<00:06, 422MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  74% 7.40G/9.97G [00:21<00:06, 419MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  75% 7.46G/9.97G [00:21<00:06, 394MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  75% 7.50G/9.97G [00:21<00:06, 392MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  76% 7.54G/9.97G [00:21<00:06, 353MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  76% 7.58G/9.97G [00:21<00:06, 342MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  76% 7.62G/9.97G [00:22<00:08, 280MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  77% 7.65G/9.97G [00:22<00:09, 243MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  77% 7.69G/9.97G [00:22<00:09, 251MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  78% 7.73G/9.97G [00:22<00:08, 271MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  78% 7.77G/9.97G [00:22<00:07, 285MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  78% 7.81G/9.97G [00:22<00:06, 313MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  79% 7.85G/9.97G [00:22<00:06, 324MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  79% 7.90G/9.97G [00:22<00:06, 345MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  80% 7.94G/9.97G [00:23<00:05, 339MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  80% 7.98G/9.97G [00:23<00:05, 358MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  80% 8.02G/9.97G [00:23<00:06, 305MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  81% 8.07G/9.97G [00:23<00:05, 342MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  81% 8.12G/9.97G [00:23<00:05, 349MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  82% 8.16G/9.97G [00:23<00:05, 348MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  82% 8.20G/9.97G [00:23<00:05, 328MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  83% 8.24G/9.97G [00:23<00:04, 347MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  83% 8.28G/9.97G [00:24<00:04, 354MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  84% 8.33G/9.97G [00:24<00:04, 362MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  84% 8.37G/9.97G [00:24<00:04, 356MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  84% 8.42G/9.97G [00:24<00:03, 389MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  85% 8.46G/9.97G [00:24<00:03, 377MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  85% 8.50G/9.97G [00:24<00:05, 275MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  86% 8.55G/9.97G [00:24<00:05, 276MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  86% 8.59G/9.97G [00:25<00:04, 304MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  87% 8.64G/9.97G [00:25<00:03, 348MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  87% 8.69G/9.97G [00:25<00:03, 368MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  88% 8.73G/9.97G [00:25<00:03, 337MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  88% 8.78G/9.97G [00:25<00:03, 322MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  88% 8.82G/9.97G [00:25<00:03, 329MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  89% 8.86G/9.97G [00:25<00:03, 349MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  89% 8.90G/9.97G [00:25<00:02, 359MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  90% 8.94G/9.97G [00:26<00:03, 335MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  90% 8.99G/9.97G [00:26<00:03, 318MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  91% 9.03G/9.97G [00:26<00:02, 340MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  91% 9.07G/9.97G [00:26<00:02, 342MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  91% 9.11G/9.97G [00:26<00:02, 342MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  92% 9.15G/9.97G [00:26<00:02, 338MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  92% 9.20G/9.97G [00:26<00:02, 330MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  93% 9.25G/9.97G [00:26<00:02, 357MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  93% 9.29G/9.97G [00:27<00:01, 370MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  94% 9.33G/9.97G [00:27<00:01, 336MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  94% 9.38G/9.97G [00:27<00:01, 370MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  95% 9.44G/9.97G [00:27<00:01, 401MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  95% 9.48G/9.97G [00:27<00:01, 404MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  96% 9.52G/9.97G [00:27<00:01, 262MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  96% 9.56G/9.97G [00:28<00:01, 264MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  96% 9.60G/9.97G [00:28<00:01, 286MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  97% 9.65G/9.97G [00:28<00:01, 309MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  97% 9.69G/9.97G [00:28<00:00, 333MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  98% 9.73G/9.97G [00:28<00:00, 317MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  98% 9.77G/9.97G [00:28<00:00, 338MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  98% 9.81G/9.97G [00:28<00:00, 290MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  99% 9.86G/9.97G [00:28<00:00, 301MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin:  99% 9.91G/9.97G [00:29<00:00, 338MB/s]\u001b[A\n","pytorch_model-00001-of-00003.bin: 100% 9.97G/9.97G [00:29<00:00, 342MB/s]\n","Downloading shards:  33% 1/3 [00:29<00:59, 29.75s/it]\n","pytorch_model-00002-of-00003.bin:   0% 0.00/10.0G [00:00<?, ?B/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   0% 10.5M/10.0G [00:00<01:58, 84.0MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   0% 31.5M/10.0G [00:00<01:12, 137MB/s] \u001b[A\n","pytorch_model-00002-of-00003.bin:   1% 73.4M/10.0G [00:00<00:40, 244MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   1% 115M/10.0G [00:00<00:34, 283MB/s] \u001b[A\n","pytorch_model-00002-of-00003.bin:   2% 168M/10.0G [00:00<00:28, 351MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   2% 210M/10.0G [00:00<00:26, 369MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   3% 252M/10.0G [00:00<00:26, 364MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   3% 304M/10.0G [00:00<00:24, 396MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   3% 346M/10.0G [00:01<00:24, 395MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   4% 388M/10.0G [00:01<00:25, 384MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   4% 440M/10.0G [00:01<00:23, 410MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   5% 482M/10.0G [00:01<00:23, 408MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   5% 524M/10.0G [00:01<00:23, 406MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   6% 577M/10.0G [00:01<00:22, 416MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   6% 619M/10.0G [00:01<00:23, 405MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   7% 661M/10.0G [00:01<00:25, 371MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   7% 713M/10.0G [00:01<00:23, 390MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   8% 755M/10.0G [00:02<00:23, 386MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   8% 807M/10.0G [00:02<00:22, 409MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   8% 849M/10.0G [00:02<00:23, 391MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   9% 891M/10.0G [00:02<00:26, 350MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:   9% 933M/10.0G [00:02<00:25, 359MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  10% 975M/10.0G [00:02<00:26, 335MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  10% 1.02G/10.0G [00:02<00:26, 337MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  11% 1.06G/10.0G [00:02<00:26, 343MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  11% 1.10G/10.0G [00:03<00:25, 352MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  11% 1.14G/10.0G [00:03<00:24, 360MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  12% 1.18G/10.0G [00:03<00:28, 306MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  12% 1.23G/10.0G [00:03<00:29, 300MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  13% 1.27G/10.0G [00:03<00:27, 316MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  13% 1.32G/10.0G [00:03<00:24, 350MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  14% 1.36G/10.0G [00:03<00:30, 283MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  14% 1.39G/10.0G [00:04<00:29, 288MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  14% 1.44G/10.0G [00:04<00:27, 311MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  15% 1.49G/10.0G [00:04<00:24, 345MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  15% 1.53G/10.0G [00:04<00:23, 360MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  16% 1.58G/10.0G [00:04<00:21, 391MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  16% 1.63G/10.0G [00:04<00:21, 388MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  17% 1.67G/10.0G [00:04<00:23, 350MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  17% 1.71G/10.0G [00:04<00:22, 367MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  18% 1.75G/10.0G [00:04<00:21, 379MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  18% 1.79G/10.0G [00:05<00:23, 345MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  18% 1.84G/10.0G [00:05<00:24, 333MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  19% 1.89G/10.0G [00:05<00:22, 366MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  19% 1.94G/10.0G [00:05<00:20, 388MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  20% 1.99G/10.0G [00:05<00:19, 408MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  20% 2.03G/10.0G [00:05<00:24, 326MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  21% 2.08G/10.0G [00:05<00:23, 341MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  21% 2.13G/10.0G [00:06<00:21, 368MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  22% 2.18G/10.0G [00:06<00:19, 391MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  22% 2.23G/10.0G [00:06<00:19, 403MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  23% 2.29G/10.0G [00:06<00:18, 412MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  23% 2.34G/10.0G [00:06<00:17, 429MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  24% 2.39G/10.0G [00:06<00:17, 440MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  24% 2.44G/10.0G [00:06<00:16, 453MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  25% 2.50G/10.0G [00:06<00:16, 442MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  25% 2.55G/10.0G [00:06<00:17, 416MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  26% 2.60G/10.0G [00:07<00:18, 404MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  26% 2.64G/10.0G [00:07<00:18, 404MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  27% 2.68G/10.0G [00:07<00:19, 371MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  27% 2.74G/10.0G [00:07<00:18, 392MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  28% 2.79G/10.0G [00:07<00:17, 405MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  28% 2.83G/10.0G [00:07<00:17, 403MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  29% 2.87G/10.0G [00:08<00:26, 266MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  29% 2.93G/10.0G [00:08<00:23, 297MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  30% 2.97G/10.0G [00:08<00:24, 285MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  30% 3.01G/10.0G [00:08<00:22, 313MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  31% 3.05G/10.0G [00:08<00:21, 331MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  31% 3.09G/10.0G [00:08<00:19, 346MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  31% 3.14G/10.0G [00:08<00:19, 351MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  32% 3.18G/10.0G [00:08<00:18, 366MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  32% 3.22G/10.0G [00:08<00:18, 362MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  33% 3.26G/10.0G [00:09<00:18, 372MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  33% 3.30G/10.0G [00:09<00:18, 367MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  33% 3.34G/10.0G [00:09<00:18, 360MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  34% 3.40G/10.0G [00:09<00:17, 377MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  34% 3.44G/10.0G [00:09<00:17, 371MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  35% 3.48G/10.0G [00:09<00:18, 360MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  35% 3.52G/10.0G [00:09<00:17, 366MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  36% 3.58G/10.0G [00:09<00:16, 395MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  36% 3.63G/10.0G [00:10<00:15, 399MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  37% 3.68G/10.0G [00:10<00:15, 411MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  37% 3.73G/10.0G [00:10<00:14, 419MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  38% 3.79G/10.0G [00:10<00:14, 421MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  38% 3.84G/10.0G [00:10<00:14, 425MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  39% 3.89G/10.0G [00:10<00:14, 436MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  39% 3.94G/10.0G [00:10<00:14, 427MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  40% 4.00G/10.0G [00:10<00:15, 393MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  40% 4.04G/10.0G [00:11<00:15, 383MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  41% 4.08G/10.0G [00:11<00:16, 364MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  41% 4.12G/10.0G [00:11<00:15, 371MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  42% 4.17G/10.0G [00:11<00:15, 377MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  42% 4.22G/10.0G [00:11<00:15, 381MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  43% 4.27G/10.0G [00:11<00:14, 403MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  43% 4.31G/10.0G [00:11<00:14, 406MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  44% 4.35G/10.0G [00:11<00:14, 401MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  44% 4.39G/10.0G [00:11<00:14, 379MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  44% 4.44G/10.0G [00:12<00:15, 370MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  45% 4.48G/10.0G [00:12<00:15, 355MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  45% 4.52G/10.0G [00:12<00:15, 348MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  46% 4.56G/10.0G [00:12<00:15, 358MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  46% 4.60G/10.0G [00:12<00:14, 362MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  46% 4.65G/10.0G [00:12<00:15, 347MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  47% 4.69G/10.0G [00:12<00:16, 318MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  47% 4.73G/10.0G [00:12<00:15, 332MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  48% 4.77G/10.0G [00:13<00:16, 313MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  48% 4.81G/10.0G [00:18<03:28, 24.9MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  49% 4.85G/10.0G [00:18<02:29, 34.4MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  49% 4.90G/10.0G [00:18<01:48, 47.0MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  49% 4.93G/10.0G [00:18<01:25, 59.1MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  50% 4.98G/10.0G [00:18<00:57, 86.9MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  50% 5.02G/10.0G [00:19<00:44, 112MB/s] \u001b[A\n","pytorch_model-00002-of-00003.bin:  51% 5.06G/10.0G [00:19<00:38, 130MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  51% 5.11G/10.0G [00:19<00:30, 161MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  51% 5.15G/10.0G [00:19<00:25, 189MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  52% 5.19G/10.0G [00:19<00:21, 221MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  52% 5.23G/10.0G [00:19<00:19, 246MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  53% 5.27G/10.0G [00:19<00:17, 275MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  53% 5.32G/10.0G [00:19<00:16, 280MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  54% 5.37G/10.0G [00:20<00:14, 319MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  54% 5.42G/10.0G [00:20<00:12, 353MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  55% 5.47G/10.0G [00:20<00:11, 381MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  55% 5.52G/10.0G [00:20<00:11, 387MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  56% 5.56G/10.0G [00:20<00:11, 387MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  56% 5.60G/10.0G [00:20<00:11, 393MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  56% 5.64G/10.0G [00:20<00:10, 397MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  57% 5.68G/10.0G [00:20<00:12, 347MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  57% 5.73G/10.0G [00:21<00:13, 316MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  58% 5.77G/10.0G [00:21<00:12, 339MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  58% 5.82G/10.0G [00:21<00:11, 366MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  59% 5.87G/10.0G [00:21<00:10, 383MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  59% 5.91G/10.0G [00:21<00:11, 352MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  60% 5.96G/10.0G [00:21<00:11, 346MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  60% 6.00G/10.0G [00:21<00:14, 281MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  60% 6.03G/10.0G [00:22<00:15, 249MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  61% 6.07G/10.0G [00:22<00:14, 275MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  61% 6.10G/10.0G [00:22<00:14, 271MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  62% 6.16G/10.0G [00:22<00:12, 314MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  62% 6.21G/10.0G [00:22<00:10, 358MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  62% 6.25G/10.0G [00:22<00:10, 370MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  63% 6.30G/10.0G [00:22<00:09, 399MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  63% 6.34G/10.0G [00:22<00:09, 400MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  64% 6.39G/10.0G [00:22<00:09, 397MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  64% 6.43G/10.0G [00:23<00:10, 357MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  65% 6.47G/10.0G [00:23<00:09, 370MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  65% 6.52G/10.0G [00:23<00:08, 407MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  66% 6.59G/10.0G [00:23<00:07, 445MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  66% 6.64G/10.0G [00:23<00:07, 440MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  67% 6.69G/10.0G [00:23<00:07, 419MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  67% 6.74G/10.0G [00:23<00:08, 362MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  68% 6.78G/10.0G [00:23<00:08, 371MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  68% 6.84G/10.0G [00:24<00:08, 383MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  69% 6.88G/10.0G [00:24<00:08, 366MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  69% 6.92G/10.0G [00:24<00:08, 352MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  70% 6.96G/10.0G [00:24<00:08, 341MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  70% 7.00G/10.0G [00:24<00:08, 338MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  70% 7.05G/10.0G [00:24<00:10, 271MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  71% 7.08G/10.0G [00:24<00:10, 270MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  71% 7.12G/10.0G [00:25<00:09, 293MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  72% 7.16G/10.0G [00:25<00:08, 319MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  72% 7.20G/10.0G [00:25<00:08, 341MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  73% 7.26G/10.0G [00:25<00:07, 368MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  73% 7.31G/10.0G [00:25<00:06, 389MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  74% 7.35G/10.0G [00:25<00:06, 390MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  74% 7.39G/10.0G [00:25<00:06, 395MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  74% 7.43G/10.0G [00:25<00:07, 344MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  75% 7.48G/10.0G [00:26<00:08, 305MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  75% 7.52G/10.0G [00:26<00:09, 271MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  76% 7.56G/10.0G [00:26<00:08, 288MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  76% 7.60G/10.0G [00:26<00:08, 295MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  76% 7.64G/10.0G [00:26<00:07, 313MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  77% 7.69G/10.0G [00:26<00:07, 315MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  77% 7.73G/10.0G [00:26<00:07, 316MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  78% 7.77G/10.0G [00:27<00:07, 307MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  78% 7.81G/10.0G [00:27<00:06, 329MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  79% 7.86G/10.0G [00:27<00:05, 358MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  79% 7.91G/10.0G [00:27<00:05, 367MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  79% 7.95G/10.0G [00:27<00:06, 332MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  80% 7.99G/10.0G [00:27<00:06, 294MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  80% 8.03G/10.0G [00:27<00:06, 309MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  81% 8.07G/10.0G [00:27<00:06, 305MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  81% 8.11G/10.0G [00:28<00:06, 288MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  81% 8.14G/10.0G [00:28<00:07, 261MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  82% 8.18G/10.0G [00:28<00:06, 280MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  82% 8.23G/10.0G [00:28<00:05, 314MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  83% 8.27G/10.0G [00:28<00:05, 296MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  83% 8.32G/10.0G [00:28<00:05, 307MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  83% 8.35G/10.0G [00:28<00:05, 302MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  84% 8.38G/10.0G [00:29<00:05, 287MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  84% 8.41G/10.0G [00:29<00:05, 292MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  85% 8.45G/10.0G [00:29<00:04, 319MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  85% 8.49G/10.0G [00:29<00:04, 324MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  85% 8.54G/10.0G [00:29<00:04, 307MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  86% 8.58G/10.0G [00:29<00:04, 325MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  86% 8.62G/10.0G [00:29<00:04, 334MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  87% 8.67G/10.0G [00:29<00:03, 366MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  87% 8.72G/10.0G [00:30<00:03, 391MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  88% 8.77G/10.0G [00:30<00:03, 351MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  88% 8.81G/10.0G [00:30<00:03, 333MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  89% 8.85G/10.0G [00:30<00:03, 343MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  89% 8.90G/10.0G [00:30<00:03, 363MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  89% 8.94G/10.0G [00:30<00:02, 369MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  90% 8.99G/10.0G [00:30<00:02, 362MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  90% 9.03G/10.0G [00:30<00:03, 320MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  91% 9.07G/10.0G [00:31<00:03, 309MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  91% 9.11G/10.0G [00:31<00:03, 292MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  92% 9.15G/10.0G [00:31<00:02, 307MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  92% 9.20G/10.0G [00:31<00:02, 327MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  92% 9.24G/10.0G [00:31<00:02, 326MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  93% 9.28G/10.0G [00:31<00:02, 343MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  93% 9.33G/10.0G [00:31<00:01, 383MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  94% 9.37G/10.0G [00:31<00:01, 366MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  94% 9.42G/10.0G [00:32<00:02, 238MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  94% 9.45G/10.0G [00:32<00:02, 232MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  95% 9.48G/10.0G [00:32<00:02, 204MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  95% 9.51G/10.0G [00:32<00:02, 222MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  95% 9.54G/10.0G [00:32<00:02, 227MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  96% 9.57G/10.0G [00:32<00:01, 241MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  96% 9.62G/10.0G [00:33<00:01, 253MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  96% 9.65G/10.0G [00:33<00:01, 263MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  97% 9.68G/10.0G [00:33<00:01, 261MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  97% 9.72G/10.0G [00:33<00:01, 279MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  98% 9.75G/10.0G [00:33<00:00, 263MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  98% 9.78G/10.0G [00:33<00:01, 187MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  98% 9.83G/10.0G [00:34<00:00, 222MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  99% 9.86G/10.0G [00:34<00:00, 233MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin:  99% 9.91G/10.0G [00:34<00:00, 285MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin: 100% 9.95G/10.0G [00:34<00:00, 313MB/s]\u001b[A\n","pytorch_model-00002-of-00003.bin: 100% 10.0G/10.0G [00:34<00:00, 290MB/s]\n","Downloading shards:  67% 2/3 [01:04<00:32, 32.87s/it]\n","pytorch_model-00003-of-00003.bin:   0% 0.00/2.22G [00:00<?, ?B/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:   1% 31.5M/2.22G [00:00<00:08, 260MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:   3% 62.9M/2.22G [00:00<00:08, 262MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:   4% 94.4M/2.22G [00:00<00:08, 255MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:   6% 126M/2.22G [00:00<00:09, 233MB/s] \u001b[A\n","pytorch_model-00003-of-00003.bin:   7% 157M/2.22G [00:00<00:08, 252MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:   8% 189M/2.22G [00:00<00:07, 263MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  10% 220M/2.22G [00:00<00:07, 254MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  12% 262M/2.22G [00:00<00:06, 286MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  13% 294M/2.22G [00:01<00:08, 241MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  15% 336M/2.22G [00:01<00:07, 268MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  17% 367M/2.22G [00:01<00:06, 278MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  18% 398M/2.22G [00:01<00:06, 274MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  19% 430M/2.22G [00:01<00:06, 269MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  21% 461M/2.22G [00:01<00:06, 280MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  22% 493M/2.22G [00:01<00:06, 271MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  24% 524M/2.22G [00:01<00:06, 276MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  25% 556M/2.22G [00:02<00:05, 281MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  26% 587M/2.22G [00:02<00:05, 285MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  28% 619M/2.22G [00:02<00:05, 273MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  29% 650M/2.22G [00:02<00:05, 270MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  31% 692M/2.22G [00:02<00:05, 288MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  33% 724M/2.22G [00:02<00:05, 264MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  34% 755M/2.22G [00:02<00:05, 264MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  35% 786M/2.22G [00:02<00:05, 268MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  37% 828M/2.22G [00:03<00:04, 285MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  39% 860M/2.22G [00:03<00:04, 288MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  40% 891M/2.22G [00:03<00:04, 275MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  41% 923M/2.22G [00:03<00:04, 283MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  43% 954M/2.22G [00:03<00:04, 276MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  44% 986M/2.22G [00:03<00:04, 263MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  46% 1.02G/2.22G [00:03<00:04, 268MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  47% 1.05G/2.22G [00:03<00:04, 273MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  49% 1.09G/2.22G [00:04<00:03, 287MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  50% 1.12G/2.22G [00:04<00:03, 282MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  52% 1.15G/2.22G [00:04<00:03, 269MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  53% 1.18G/2.22G [00:04<00:03, 276MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  55% 1.22G/2.22G [00:04<00:03, 274MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  56% 1.25G/2.22G [00:04<00:03, 265MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  58% 1.28G/2.22G [00:04<00:03, 269MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  59% 1.31G/2.22G [00:04<00:03, 271MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  60% 1.34G/2.22G [00:04<00:03, 255MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  62% 1.37G/2.22G [00:05<00:03, 244MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  63% 1.41G/2.22G [00:05<00:03, 250MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  65% 1.44G/2.22G [00:05<00:03, 219MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  66% 1.47G/2.22G [00:05<00:03, 232MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  67% 1.50G/2.22G [00:05<00:02, 246MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  69% 1.53G/2.22G [00:05<00:02, 250MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  70% 1.56G/2.22G [00:05<00:02, 260MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  72% 1.59G/2.22G [00:06<00:02, 257MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  74% 1.64G/2.22G [00:06<00:02, 280MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  75% 1.67G/2.22G [00:06<00:01, 283MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  76% 1.70G/2.22G [00:06<00:01, 281MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  78% 1.73G/2.22G [00:06<00:01, 284MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  79% 1.76G/2.22G [00:06<00:01, 282MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  81% 1.79G/2.22G [00:06<00:01, 263MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  83% 1.84G/2.22G [00:06<00:01, 287MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  84% 1.87G/2.22G [00:06<00:01, 292MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  85% 1.90G/2.22G [00:07<00:01, 287MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  87% 1.93G/2.22G [00:07<00:01, 289MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  88% 1.96G/2.22G [00:07<00:00, 292MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  90% 2.00G/2.22G [00:07<00:00, 305MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  91% 2.03G/2.22G [00:07<00:00, 307MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  93% 2.07G/2.22G [00:07<00:00, 291MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  95% 2.11G/2.22G [00:07<00:00, 305MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin:  97% 2.15G/2.22G [00:07<00:00, 315MB/s]\u001b[A\n","pytorch_model-00003-of-00003.bin: 100% 2.22G/2.22G [00:08<00:00, 275MB/s]\n","Downloading shards: 100% 3/3 [01:13<00:00, 24.49s/it]\n","Set max length to 16384\n","config.json: 100% 508/508 [00:00<00:00, 3.67MB/s]\n","pytorch_model.bin: 100% 1.22G/1.22G [00:04<00:00, 261MB/s]\n","Loading checkpoint shards: 100% 3/3 [00:20<00:00,  6.84s/it]\n","generation_config.json: 100% 177/177 [00:00<00:00, 1.53MB/s]\n","tokenizer_config.json: 100% 2.38k/2.38k [00:00<00:00, 23.5MB/s]\n","tokenization_internlm2.py: 100% 8.81k/8.81k [00:00<00:00, 57.4MB/s]\n","A new version of the following files was downloaded from https://huggingface.co/internlm/internlm-xcomposer2d5-7b:\n","- tokenization_internlm2.py\n",". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n","tokenizer.model: 100% 1.48M/1.48M [00:00<00:00, 10.0MB/s]\n","added_tokens.json: 100% 158/158 [00:00<00:00, 1.24MB/s]\n","special_tokens_map.json: 100% 713/713 [00:00<00:00, 5.85MB/s]\n","trainable params: 151,003,136 || all params: 11,246,729,216 || trainable%: 1.342640452169663\n","Loading data...\n","Load 1000 samples from ['/content/drive/MyDrive/label.json', '1']\n","init mix data at rank 0\n","load 1000 data\n","1000samples is loaded\n","True\n","/usr/local/lib/python3.10/dist-packages/accelerate/accelerator.py:451: FutureWarning: Passing the following arguments to `Accelerator` is deprecated and will be removed in version 1.0 of Accelerate: dict_keys(['dispatch_batches']). Please pass an `accelerate.DataLoaderConfiguration` instead: \n","dataloader_config = DataLoaderConfiguration(dispatch_batches=None)\n","  warnings.warn(\n","  0% 0/250 [00:00<?, ?it/s]Set seed 88 for rank 0\n","/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n","  warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n","Could not estimate the number of tokens of the input, floating-point operations will not be computed\n","/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n","  warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n","{'loss': 2.5766, 'learning_rate': 1.6666666666666667e-05, 'epoch': 0.01}\n","{'loss': 2.6612, 'learning_rate': 3.3333333333333335e-05, 'epoch': 0.02}\n","{'loss': 2.7012, 'learning_rate': 5e-05, 'epoch': 0.02}\n","{'loss': 2.6389, 'learning_rate': 4.999797786569422e-05, 'epoch': 0.03}\n","{'loss': 2.1119, 'learning_rate': 4.9991911789899045e-05, 'epoch': 0.04}\n","{'loss': 1.7629, 'learning_rate': 4.998180275392809e-05, 'epoch': 0.05}\n","{'loss': 1.8109, 'learning_rate': 4.9967652393127616e-05, 'epoch': 0.06}\n","{'loss': 1.4762, 'learning_rate': 4.9949462996612016e-05, 'epoch': 0.06}\n","{'loss': 1.4984, 'learning_rate': 4.992723750689353e-05, 'epoch': 0.07}\n","{'loss': 1.2991, 'learning_rate': 4.990097951940616e-05, 'epoch': 0.08}\n","{'loss': 1.3026, 'learning_rate': 4.987069328192407e-05, 'epoch': 0.09}\n","{'loss': 1.1909, 'learning_rate': 4.9836383693874486e-05, 'epoch': 0.1}\n","{'loss': 1.3898, 'learning_rate': 4.979805630554499e-05, 'epoch': 0.1}\n","{'loss': 1.0809, 'learning_rate': 4.9755717317185726e-05, 'epoch': 0.11}\n","{'loss': 1.2745, 'learning_rate': 4.970937357800635e-05, 'epoch': 0.12}\n","{'loss': 1.2746, 'learning_rate': 4.965903258506806e-05, 'epoch': 0.13}\n","{'loss': 0.9897, 'learning_rate': 4.9604702482070765e-05, 'epoch': 0.14}\n","{'loss': 0.9035, 'learning_rate': 4.954639205803567e-05, 'epoch': 0.14}\n","{'loss': 0.9965, 'learning_rate': 4.9484110745883475e-05, 'epoch': 0.15}\n","{'loss': 1.0463, 'learning_rate': 4.9417868620908414e-05, 'epoch': 0.16}\n","{'loss': 0.9453, 'learning_rate': 4.934767639914837e-05, 'epoch': 0.17}\n","{'loss': 0.9044, 'learning_rate': 4.92735454356513e-05, 'epoch': 0.18}\n","{'loss': 1.1339, 'learning_rate': 4.9195487722638364e-05, 'epoch': 0.18}\n","{'loss': 0.9762, 'learning_rate': 4.911351588756391e-05, 'epoch': 0.19}\n","{'loss': 0.9778, 'learning_rate': 4.902764319107271e-05, 'epoch': 0.2}\n","{'loss': 0.9534, 'learning_rate': 4.893788352485482e-05, 'epoch': 0.21}\n","{'loss': 1.0598, 'learning_rate': 4.884425140939826e-05, 'epoch': 0.22}\n","{'loss': 0.9768, 'learning_rate': 4.8746761991640044e-05, 'epoch': 0.22}\n","{'loss': 0.976, 'learning_rate': 4.864543104251587e-05, 'epoch': 0.23}\n","{'loss': 0.8795, 'learning_rate': 4.8540274954408816e-05, 'epoch': 0.24}\n","{'loss': 0.9339, 'learning_rate': 4.843131073849753e-05, 'epoch': 0.25}\n","{'loss': 0.8548, 'learning_rate': 4.831855602200436e-05, 'epoch': 0.26}\n","{'loss': 0.968, 'learning_rate': 4.820202904534371e-05, 'epoch': 0.26}\n","{'loss': 0.9534, 'learning_rate': 4.8081748659171355e-05, 'epoch': 0.27}\n","{'loss': 1.0182, 'learning_rate': 4.7957734321334915e-05, 'epoch': 0.28}\n","{'loss': 0.968, 'learning_rate': 4.7830006093726145e-05, 'epoch': 0.29}\n","{'loss': 0.942, 'learning_rate': 4.7698584639035503e-05, 'epoch': 0.3}\n","{'loss': 0.929, 'learning_rate': 4.756349121740957e-05, 'epoch': 0.3}\n","{'loss': 0.8767, 'learning_rate': 4.7424747683011714e-05, 'epoch': 0.31}\n","{'loss': 0.9288, 'learning_rate': 4.72823764804868e-05, 'epoch': 0.32}\n","{'loss': 0.9817, 'learning_rate': 4.713640064133025e-05, 'epoch': 0.33}\n","{'loss': 1.0048, 'learning_rate': 4.698684378016222e-05, 'epoch': 0.34}\n","{'loss': 0.8346, 'learning_rate': 4.6833730090907514e-05, 'epoch': 0.34}\n","{'loss': 0.9673, 'learning_rate': 4.66770843428816e-05, 'epoch': 0.35}\n","{'loss': 0.9451, 'learning_rate': 4.651693187678376e-05, 'epoch': 0.36}\n","{'loss': 0.8197, 'learning_rate': 4.6353298600597666e-05, 'epoch': 0.37}\n","{'loss': 1.0502, 'learning_rate': 4.618621098540023e-05, 'epoch': 0.38}\n","{'loss': 1.0809, 'learning_rate': 4.601569606107934e-05, 'epoch': 0.38}\n","{'loss': 0.8393, 'learning_rate': 4.584178141196126e-05, 'epoch': 0.39}\n","{'loss': 0.9487, 'learning_rate': 4.5664495172348236e-05, 'epoch': 0.4}\n","{'loss': 0.976, 'learning_rate': 4.5483866021967244e-05, 'epoch': 0.41}\n","{'loss': 0.8458, 'learning_rate': 4.5299923181330414e-05, 'epoch': 0.42}\n","{'loss': 0.8526, 'learning_rate': 4.5112696407008006e-05, 'epoch': 0.42}\n","{'loss': 0.9591, 'learning_rate': 4.4922215986814684e-05, 'epoch': 0.43}\n","{'loss': 0.9073, 'learning_rate': 4.4728512734909844e-05, 'epoch': 0.44}\n","{'loss': 0.9385, 'learning_rate': 4.4531617986812735e-05, 'epoch': 0.45}\n","{'loss': 0.8503, 'learning_rate': 4.433156359433334e-05, 'epoch': 0.46}\n","{'loss': 0.7382, 'learning_rate': 4.4128381920419676e-05, 'epoch': 0.46}\n","{'loss': 0.7618, 'learning_rate': 4.392210583392238e-05, 'epoch': 0.47}\n","{'loss': 0.9815, 'learning_rate': 4.371276870427753e-05, 'epoch': 0.48}\n","{'loss': 0.9103, 'learning_rate': 4.350040439610844e-05, 'epoch': 0.49}\n","{'loss': 0.8797, 'learning_rate': 4.328504726374733e-05, 'epoch': 0.5}\n","{'loss': 0.8701, 'learning_rate': 4.3066732145677835e-05, 'epoch': 0.5}\n","{'loss': 0.9791, 'learning_rate': 4.2845494358899134e-05, 'epoch': 0.51}\n","{'loss': 0.8046, 'learning_rate': 4.262136969321269e-05, 'epoch': 0.52}\n","{'loss': 0.8428, 'learning_rate': 4.239439440543252e-05, 'epoch': 0.53}\n","{'loss': 0.8797, 'learning_rate': 4.216460521351992e-05, 'epoch': 0.54}\n","{'loss': 0.8759, 'learning_rate': 4.193203929064353e-05, 'epoch': 0.54}\n","{'loss': 0.908, 'learning_rate': 4.1696734259165813e-05, 'epoch': 0.55}\n","{'loss': 0.8558, 'learning_rate': 4.145872818455691e-05, 'epoch': 0.56}\n","{'loss': 0.8773, 'learning_rate': 4.121805956923668e-05, 'epoch': 0.57}\n","{'loss': 0.9264, 'learning_rate': 4.09747673463462e-05, 'epoch': 0.58}\n","{'loss': 0.7748, 'learning_rate': 4.072889087344949e-05, 'epoch': 0.58}\n","{'loss': 0.7601, 'learning_rate': 4.048046992616661e-05, 'epoch': 0.59}\n","{'loss': 0.897, 'learning_rate': 4.022954469173914e-05, 'epoch': 0.6}\n","{'loss': 0.9385, 'learning_rate': 3.997615576252907e-05, 'epoch': 0.61}\n","{'loss': 0.7677, 'learning_rate': 3.972034412945211e-05, 'epoch': 0.62}\n","{'loss': 0.9435, 'learning_rate': 3.946215117534658e-05, 'epoch': 0.62}\n","{'loss': 0.9926, 'learning_rate': 3.920161866827889e-05, 'epoch': 0.63}\n","{'loss': 0.7888, 'learning_rate': 3.893878875478667e-05, 'epoch': 0.64}\n","{'loss': 0.8785, 'learning_rate': 3.867370395306068e-05, 'epoch': 0.65}\n","{'loss': 0.8874, 'learning_rate': 3.8406407146066635e-05, 'epoch': 0.66}\n","{'loss': 0.742, 'learning_rate': 3.813694157460801e-05, 'epoch': 0.66}\n","{'loss': 0.8277, 'learning_rate': 3.7865350830330896e-05, 'epoch': 0.67}\n","{'loss': 0.9179, 'learning_rate': 3.7591678848672194e-05, 'epoch': 0.68}\n","{'loss': 0.7319, 'learning_rate': 3.7315969901752104e-05, 'epoch': 0.69}\n","{'loss': 0.8333, 'learning_rate': 3.7038268591212246e-05, 'epoch': 0.7}\n","{'loss': 0.7742, 'learning_rate': 3.6758619841000334e-05, 'epoch': 0.7}\n","{'loss': 0.816, 'learning_rate': 3.6477068890102895e-05, 'epoch': 0.71}\n","{'loss': 0.8413, 'learning_rate': 3.619366128522686e-05, 'epoch': 0.72}\n","{'loss': 0.8512, 'learning_rate': 3.590844287343146e-05, 'epoch': 0.73}\n","{'loss': 0.7648, 'learning_rate': 3.5621459794711484e-05, 'epoch': 0.74}\n","{'loss': 0.8134, 'learning_rate': 3.5332758474533255e-05, 'epoch': 0.74}\n","{'loss': 0.8745, 'learning_rate': 3.504238561632424e-05, 'epoch': 0.75}\n","{'loss': 0.868, 'learning_rate': 3.475038819391789e-05, 'epoch': 0.76}\n","{'loss': 0.8177, 'learning_rate': 3.445681344395462e-05, 'epoch': 0.77}\n","{'loss': 0.879, 'learning_rate': 3.416170885824028e-05, 'epoch': 0.78}\n","{'loss': 0.76, 'learning_rate': 3.386512217606339e-05, 'epoch': 0.78}\n","{'loss': 0.8755, 'learning_rate': 3.3567101376472333e-05, 'epoch': 0.79}\n","{'loss': 0.9767, 'learning_rate': 3.326769467051372e-05, 'epoch': 0.8}\n","{'loss': 0.8406, 'learning_rate': 3.2966950493433264e-05, 'epoch': 0.81}\n","{'loss': 0.8872, 'learning_rate': 3.266491749684039e-05, 'epoch': 0.82}\n","{'loss': 0.731, 'learning_rate': 3.236164454083781e-05, 'epoch': 0.82}\n","{'loss': 0.8103, 'learning_rate': 3.2057180686117396e-05, 'epoch': 0.83}\n","{'loss': 0.908, 'learning_rate': 3.175157518602357e-05, 'epoch': 0.84}\n","{'loss': 0.8204, 'learning_rate': 3.144487747858562e-05, 'epoch': 0.85}\n","{'loss': 0.749, 'learning_rate': 3.1137137178519985e-05, 'epoch': 0.86}\n","{'loss': 0.7598, 'learning_rate': 3.082840406920411e-05, 'epoch': 0.86}\n","{'loss': 0.7852, 'learning_rate': 3.051872809462293e-05, 'epoch': 0.87}\n","{'loss': 0.7729, 'learning_rate': 3.0208159351289413e-05, 'epoch': 0.88}\n","{'loss': 0.8382, 'learning_rate': 2.9896748080140334e-05, 'epoch': 0.89}\n","{'loss': 0.7846, 'learning_rate': 2.958454465840891e-05, 'epoch': 0.9}\n","{'loss': 0.9748, 'learning_rate': 2.9271599591475057e-05, 'epoch': 0.9}\n","{'loss': 0.9441, 'learning_rate': 2.8957963504695252e-05, 'epoch': 0.91}\n","{'loss': 0.8583, 'learning_rate': 2.864368713521274e-05, 'epoch': 0.92}\n","{'loss': 0.8541, 'learning_rate': 2.832882132374977e-05, 'epoch': 0.93}\n","{'loss': 0.7155, 'learning_rate': 2.8013417006383076e-05, 'epoch': 0.94}\n","{'loss': 0.8662, 'learning_rate': 2.7697525206303892e-05, 'epoch': 0.94}\n","{'loss': 0.8897, 'learning_rate': 2.738119702556387e-05, 'epoch': 0.95}\n","{'loss': 0.7744, 'learning_rate': 2.7064483636808313e-05, 'epoch': 0.96}\n","{'loss': 0.9302, 'learning_rate': 2.6747436274997895e-05, 'epoch': 0.97}\n","{'loss': 0.6924, 'learning_rate': 2.6430106229120372e-05, 'epoch': 0.98}\n","{'loss': 0.8476, 'learning_rate': 2.611254483389351e-05, 'epoch': 0.98}\n","{'loss': 0.7705, 'learning_rate': 2.579480346146061e-05, 'epoch': 0.99}\n","{'loss': 0.8093, 'learning_rate': 2.547693351308006e-05, 'epoch': 1.0}\n"," 50% 125/250 [39:16<32:37, 15.66s/it]/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n","  warnings.warn(\n","/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1802: UserWarning: Positional args are being deprecated, use kwargs instead. Refer to https://pytorch.org/docs/master/generated/torch.nn.Module.html#torch.nn.Module.state_dict for details.\n","  warnings.warn(\n","{'loss': 0.8152, 'learning_rate': 2.515898641081003e-05, 'epoch': 1.01}\n","{'loss': 0.8128, 'learning_rate': 2.4841013589189976e-05, 'epoch': 1.02}\n","{'loss': 0.6815, 'learning_rate': 2.4523066486919945e-05, 'epoch': 1.02}\n","{'loss': 0.8236, 'learning_rate': 2.4205196538539386e-05, 'epoch': 1.03}\n","{'loss': 0.7944, 'learning_rate': 2.38874551661065e-05, 'epoch': 1.04}\n","{'loss': 0.7862, 'learning_rate': 2.356989377087963e-05, 'epoch': 1.05}\n","{'loss': 0.8241, 'learning_rate': 2.3252563725002104e-05, 'epoch': 1.06}\n","{'loss': 0.79, 'learning_rate': 2.2935516363191693e-05, 'epoch': 1.06}\n","{'loss': 0.9297, 'learning_rate': 2.2618802974436138e-05, 'epoch': 1.07}\n","{'loss': 0.7508, 'learning_rate': 2.2302474793696117e-05, 'epoch': 1.08}\n","{'loss': 0.829, 'learning_rate': 2.1986582993616926e-05, 'epoch': 1.09}\n","{'loss': 0.9006, 'learning_rate': 2.1671178676250238e-05, 'epoch': 1.1}\n","{'loss': 0.7289, 'learning_rate': 2.1356312864787263e-05, 'epoch': 1.1}\n","{'loss': 0.7473, 'learning_rate': 2.104203649530475e-05, 'epoch': 1.11}\n","{'loss': 0.8448, 'learning_rate': 2.0728400408524946e-05, 'epoch': 1.12}\n","{'loss': 0.6737, 'learning_rate': 2.04154553415911e-05, 'epoch': 1.13}\n","{'loss': 0.7037, 'learning_rate': 2.0103251919859668e-05, 'epoch': 1.14}\n","{'loss': 0.6476, 'learning_rate': 1.9791840648710596e-05, 'epoch': 1.14}\n","{'loss': 0.7181, 'learning_rate': 1.948127190537708e-05, 'epoch': 1.15}\n","{'loss': 0.7373, 'learning_rate': 1.9171595930795897e-05, 'epoch': 1.16}\n","{'loss': 0.8151, 'learning_rate': 1.8862862821480025e-05, 'epoch': 1.17}\n","{'loss': 0.8481, 'learning_rate': 1.855512252141439e-05, 'epoch': 1.18}\n","{'loss': 0.8786, 'learning_rate': 1.8248424813976435e-05, 'epoch': 1.18}\n","{'loss': 0.7344, 'learning_rate': 1.7942819313882613e-05, 'epoch': 1.19}\n","{'loss': 0.8512, 'learning_rate': 1.76383554591622e-05, 'epoch': 1.2}\n","{'loss': 0.8542, 'learning_rate': 1.7335082503159617e-05, 'epoch': 1.21}\n","{'loss': 0.8898, 'learning_rate': 1.7033049506566745e-05, 'epoch': 1.22}\n","{'loss': 0.7487, 'learning_rate': 1.6732305329486292e-05, 'epoch': 1.22}\n","{'loss': 0.8603, 'learning_rate': 1.6432898623527676e-05, 'epoch': 1.23}\n","{'loss': 0.7742, 'learning_rate': 1.613487782393661e-05, 'epoch': 1.24}\n","{'loss': 0.8855, 'learning_rate': 1.5838291141759725e-05, 'epoch': 1.25}\n","{'loss': 0.7301, 'learning_rate': 1.5543186556045382e-05, 'epoch': 1.26}\n","{'loss': 0.702, 'learning_rate': 1.5249611806082108e-05, 'epoch': 1.26}\n","{'loss': 0.8106, 'learning_rate': 1.495761438367577e-05, 'epoch': 1.27}\n","{'loss': 0.7197, 'learning_rate': 1.4667241525466751e-05, 'epoch': 1.28}\n","{'loss': 0.7888, 'learning_rate': 1.437854020528852e-05, 'epoch': 1.29}\n","{'loss': 0.7345, 'learning_rate': 1.409155712656855e-05, 'epoch': 1.3}\n","{'loss': 0.7913, 'learning_rate': 1.3806338714773143e-05, 'epoch': 1.3}\n","{'loss': 0.7088, 'learning_rate': 1.3522931109897108e-05, 'epoch': 1.31}\n","{'loss': 0.6979, 'learning_rate': 1.324138015899967e-05, 'epoch': 1.32}\n","{'loss': 0.7551, 'learning_rate': 1.2961731408787761e-05, 'epoch': 1.33}\n","{'loss': 0.8441, 'learning_rate': 1.2684030098247893e-05, 'epoch': 1.34}\n","{'loss': 0.793, 'learning_rate': 1.2408321151327818e-05, 'epoch': 1.34}\n","{'loss': 0.7326, 'learning_rate': 1.2134649169669102e-05, 'epoch': 1.35}\n","{'loss': 0.9824, 'learning_rate': 1.1863058425391993e-05, 'epoch': 1.36}\n","{'loss': 0.806, 'learning_rate': 1.1593592853933374e-05, 'epoch': 1.37}\n","{'loss': 0.8263, 'learning_rate': 1.1326296046939333e-05, 'epoch': 1.38}\n","{'loss': 0.9233, 'learning_rate': 1.1061211245213338e-05, 'epoch': 1.38}\n","{'loss': 0.8172, 'learning_rate': 1.0798381331721109e-05, 'epoch': 1.39}\n","{'loss': 0.7512, 'learning_rate': 1.0537848824653418e-05, 'epoch': 1.4}\n","{'loss': 0.7811, 'learning_rate': 1.0279655870547894e-05, 'epoch': 1.41}\n","{'loss': 0.752, 'learning_rate': 1.002384423747093e-05, 'epoch': 1.42}\n","{'loss': 0.7855, 'learning_rate': 9.770455308260859e-06, 'epoch': 1.42}\n","{'loss': 0.8299, 'learning_rate': 9.519530073833405e-06, 'epoch': 1.43}\n","{'loss': 0.9079, 'learning_rate': 9.271109126550515e-06, 'epoch': 1.44}\n","{'loss': 0.7, 'learning_rate': 9.025232653653801e-06, 'epoch': 1.45}\n","{'loss': 0.8924, 'learning_rate': 8.781940430763328e-06, 'epoch': 1.46}\n","{'loss': 0.6699, 'learning_rate': 8.5412718154431e-06, 'epoch': 1.46}\n","{'loss': 0.7391, 'learning_rate': 8.303265740834182e-06, 'epoch': 1.47}\n","{'loss': 0.7712, 'learning_rate': 8.067960709356478e-06, 'epoch': 1.48}\n","{'loss': 0.7392, 'learning_rate': 7.83539478648008e-06, 'epoch': 1.49}\n","{'loss': 0.957, 'learning_rate': 7.605605594567478e-06, 'epoch': 1.5}\n","{'loss': 0.8057, 'learning_rate': 7.378630306787315e-06, 'epoch': 1.5}\n","{'loss': 0.83, 'learning_rate': 7.15450564110087e-06, 'epoch': 1.51}\n","{'loss': 0.7776, 'learning_rate': 6.933267854322167e-06, 'epoch': 1.52}\n","{'loss': 0.9242, 'learning_rate': 6.714952736252672e-06, 'epoch': 1.53}\n","{'loss': 0.9035, 'learning_rate': 6.499595603891565e-06, 'epoch': 1.54}\n","{'loss': 0.7541, 'learning_rate': 6.28723129572247e-06, 'epoch': 1.54}\n","{'loss': 0.7437, 'learning_rate': 6.07789416607763e-06, 'epoch': 1.55}\n","{'loss': 0.786, 'learning_rate': 5.871618079580327e-06, 'epoch': 1.56}\n","{'loss': 0.7352, 'learning_rate': 5.668436405666655e-06, 'epoch': 1.57}\n","{'loss': 0.8443, 'learning_rate': 5.468382013187273e-06, 'epoch': 1.58}\n","{'loss': 0.715, 'learning_rate': 5.271487265090163e-06, 'epoch': 1.58}\n","{'loss': 0.775, 'learning_rate': 5.077784013185319e-06, 'epoch': 1.59}\n","{'loss': 0.7808, 'learning_rate': 4.887303592992001e-06, 'epoch': 1.6}\n","{'loss': 0.8436, 'learning_rate': 4.700076818669591e-06, 'epoch': 1.61}\n","{'loss': 0.8544, 'learning_rate': 4.516133978032757e-06, 'epoch': 1.62}\n","{'loss': 0.7371, 'learning_rate': 4.335504827651765e-06, 'epoch': 1.62}\n","{'loss': 0.7556, 'learning_rate': 4.158218588038745e-06, 'epoch': 1.63}\n","{'loss': 0.794, 'learning_rate': 3.984303938920666e-06, 'epoch': 1.64}\n","{'loss': 0.7684, 'learning_rate': 3.8137890145997806e-06, 'epoch': 1.65}\n","{'loss': 0.6764, 'learning_rate': 3.646701399402333e-06, 'epoch': 1.66}\n","{'loss': 0.8171, 'learning_rate': 3.483068123216246e-06, 'epoch': 1.66}\n","{'loss': 0.8006, 'learning_rate': 3.3229156571184084e-06, 'epoch': 1.67}\n","{'loss': 0.8235, 'learning_rate': 3.1662699090924923e-06, 'epoch': 1.68}\n","{'loss': 0.7316, 'learning_rate': 3.013156219837776e-06, 'epoch': 1.69}\n","{'loss': 0.7856, 'learning_rate': 2.8635993586697553e-06, 'epoch': 1.7}\n","{'loss': 0.9472, 'learning_rate': 2.717623519513199e-06, 'epoch': 1.7}\n","{'loss': 0.8871, 'learning_rate': 2.575252316988286e-06, 'epoch': 1.71}\n","{'loss': 0.7211, 'learning_rate': 2.4365087825904355e-06, 'epoch': 1.72}\n","{'loss': 0.8231, 'learning_rate': 2.3014153609644986e-06, 'epoch': 1.73}\n","{'loss': 0.7742, 'learning_rate': 2.169993906273865e-06, 'epoch': 1.74}\n","{'loss': 0.6852, 'learning_rate': 2.0422656786650866e-06, 'epoch': 1.74}\n","{'loss': 0.6598, 'learning_rate': 1.9182513408286447e-06, 'epoch': 1.75}\n","{'loss': 0.8363, 'learning_rate': 1.7979709546562973e-06, 'epoch': 1.76}\n","{'loss': 0.8847, 'learning_rate': 1.6814439779956503e-06, 'epoch': 1.77}\n","{'loss': 0.8718, 'learning_rate': 1.5686892615024678e-06, 'epoch': 1.78}\n","{'loss': 0.7951, 'learning_rate': 1.45972504559119e-06, 'epoch': 1.78}\n","{'loss': 0.6782, 'learning_rate': 1.3545689574841342e-06, 'epoch': 1.79}\n","{'loss': 0.7825, 'learning_rate': 1.253238008359961e-06, 'epoch': 1.8}\n","{'loss': 0.718, 'learning_rate': 1.155748590601749e-06, 'epoch': 1.81}\n","{'loss': 0.7334, 'learning_rate': 1.0621164751451835e-06, 'epoch': 1.82}\n","{'loss': 0.6697, 'learning_rate': 9.723568089272883e-07, 'epoch': 1.82}\n","{'loss': 0.8384, 'learning_rate': 8.864841124360923e-07, 'epoch': 1.83}\n","{'loss': 0.7294, 'learning_rate': 8.045122773616354e-07, 'epoch': 1.84}\n","{'loss': 0.72, 'learning_rate': 7.264545643486997e-07, 'epoch': 1.85}\n","{'loss': 0.909, 'learning_rate': 6.523236008516343e-07, 'epoch': 1.86}\n","{'loss': 0.6852, 'learning_rate': 5.821313790915883e-07, 'epoch': 1.86}\n","{'loss': 0.8316, 'learning_rate': 5.158892541165278e-07, 'epoch': 1.87}\n","{'loss': 0.8131, 'learning_rate': 4.536079419643374e-07, 'epoch': 1.88}\n","{'loss': 0.7803, 'learning_rate': 3.9529751792923844e-07, 'epoch': 1.89}\n","{'loss': 0.7678, 'learning_rate': 3.4096741493194197e-07, 'epoch': 1.9}\n","{'loss': 0.8511, 'learning_rate': 2.9062642199365333e-07, 'epoch': 1.9}\n","{'loss': 0.7571, 'learning_rate': 2.4428268281428136e-07, 'epoch': 1.91}\n","{'loss': 0.8188, 'learning_rate': 2.0194369445501183e-07, 'epoch': 1.92}\n","{'loss': 0.8357, 'learning_rate': 1.6361630612551083e-07, 'epoch': 1.93}\n","{'loss': 0.725, 'learning_rate': 1.2930671807592498e-07, 'epoch': 1.94}\n","{'loss': 0.8122, 'learning_rate': 9.90204805938505e-08, 'epoch': 1.94}\n","{'loss': 0.8799, 'learning_rate': 7.276249310647632e-08, 'epoch': 1.95}\n","{'loss': 0.7587, 'learning_rate': 5.053700338798206e-08, 'epoch': 1.96}\n","{'loss': 0.7835, 'learning_rate': 3.2347606872387694e-08, 'epoch': 1.97}\n","{'loss': 0.7577, 'learning_rate': 1.8197246071910424e-08, 'epoch': 1.98}\n","{'loss': 0.7412, 'learning_rate': 8.088210100951066e-09, 'epoch': 1.98}\n","{'loss': 0.9044, 'learning_rate': 2.022134305779133e-09, 'epoch': 1.99}\n","{'loss': 0.8312, 'learning_rate': 0.0, 'epoch': 2.0}\n","{'train_runtime': 5129.4054, 'train_samples_per_second': 0.39, 'train_steps_per_second': 0.049, 'train_loss': 0.8924984774589538, 'epoch': 2.0}\n","100% 250/250 [1:25:29<00:00, 20.52s/it]\n","2024-09-12 04:34:02.134557: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n","2024-09-12 04:34:02.168905: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n","2024-09-12 04:34:02.175550: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","2024-09-12 04:34:03.648768: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n","/usr/local/lib/python3.10/dist-packages/huggingface_hub/file_download.py:1150: FutureWarning: `resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.\n","  warnings.warn(\n","Set max length to 16384\n","Loading checkpoint shards: 100% 3/3 [02:09<00:00, 43.18s/it]\n"]}]},{"cell_type":"code","source":["!python3 merge_peft_adapter.py --adapter_model_name=/content/drive/MyDrive/output/finetune_lora --base_model_name=internlm/internlm-xcomposer2d5-7b --output_name=/content/drive/MyDrive/output/finetune_full0830"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"24CJbqVWn10Q","executionInfo":{"status":"ok","timestamp":1726110058527,"user_tz":-480,"elapsed":793,"user":{"displayName":"aubrie barbella","userId":"12235799605179884781"}},"outputId":"d823bd72-e3d4-4ea3-e40e-a15cea7babb1"},"execution_count":2,"outputs":[{"output_type":"stream","name":"stdout","text":["python3: can't open file '/content/merge_peft_adapter.py': [Errno 2] No such file or directory\n"]}]},{"cell_type":"code","source":["import torch\n","from transformers import AutoModel, AutoTokenizer\n","from PIL import Image\n","\n","torch.set_grad_enabled(False)\n","\n","# img = Image.open('/content/drive/MyDrive/data/images/household0003.png')\n","# img = img.convert(\"RGB\")\n","# img.save(\"/content/drive/MyDrive/data/images/household0003.png\")\n","\n","# init model and tokenizer\n","model = AutoModel.from_pretrained('/content/drive/MyDrive/output/finetune_full0830', torch_dtype=torch.bfloat16, trust_remote_code=True).cuda().eval().half()\n","tokenizer = AutoTokenizer.from_pretrained('/content/drive/MyDrive/output/finetune_full0830', trust_remote_code=True)\n","model.tokenizer = tokenizer\n","\n","query = 'Please analyze the input household scene image, which may be an overhead view of one or more rooms, or an image taken from a certain height. Based on the content of the image, generate low-level action instructions for 4 people. Each action instruction should be a clear sequence of actions without any descriptive statements.\\n\\nRequirements:\\nEach person should have no more than two actions, and at most three.\\nThe action instructions must be clear sequences of actions without descriptive language or additional information.\\nEnsure that the action instructions are reasonable within the scene and avoid conflicts between the actions of different people.\\nPeople can be engaged in different activities, but there should also be some actions that appear interactive (e.g., one person is eating while another places food on the table).\\nThe semantic information in the actions must strictly correspond to the image content, with no scenes or objects that are not present in the image, and the actions must align with common sense activities within the scene.\\nIn the action instructions, use clear subjects like \\\"a man\\\", \\\"a woman\\, \\\"a child\\\", \\\"a person\\\", or other specific identifiers to clearly indicate each person\\'s actions.\\n\\nOutput format requirements:\\nPlease output all the action sequences in English as a single string, with the sequences for different people separated by semicolons.\\nWithin each action sequence, actions should be separated by commas.\\nThe output must contain only the action sequences for the exact number of people specified in the task.\\nDo not include any extra information, labels, or text outside the specified action sequences.'\n","image = ['/content/drive/MyDrive/data/images/household0004.png']\n","\n","\n","with torch.autocast(device_type='cuda', dtype=torch.float16):\n","    response, his = model.chat(tokenizer, query, image, do_sample=False, num_beams=3, use_meta=True)\n","print(response)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":142,"referenced_widgets":["c53c0c0aa3c14d8e972db0c2a429187b","8a101039a4d444008a779318a0ee492f","5e33aa7503ce4c2bb721caa5360e65e3","1075bd2883a647a996169373033a70a0","c66752f780de49abb6b32cb293166611","cbf1c5ee5976493c9085bd867941b021","2afe2c0f8d1747a7b7d26b652149d75f","8afdb828d27a46529d37e53eb855fc4c","c95054262870443abaf096d2691b3621","6236842858044976838df84897108164","cc46f91c30ab4e4ca8dfdbdfb3480095"]},"id":"G2kmJFSbiNuH","executionInfo":{"status":"ok","timestamp":1724984626253,"user_tz":-480,"elapsed":687528,"user":{"displayName":"aubrie barbella","userId":"12235799605179884781"}},"outputId":"82f1ab42-6ab9-42b3-90f9-4d22767e7428"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Set max length to 16384\n"]},{"output_type":"display_data","data":{"text/plain":["Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]"],"application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c53c0c0aa3c14d8e972db0c2a429187b"}},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.10/dist-packages/transformers/generation/configuration_utils.py:367: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.8` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.\n","  warnings.warn(\n"]},{"output_type":"stream","name":"stdout","text":["A man places a pot on the stove, turns on the burner; A woman opens the refrigerator, takes out a bowl; A child sits at the table, places a plate in front of them; Another person washes dishes in the sink, dries them with a towel\n"]}]}]}