{"nbformat":4,"nbformat_minor":0,"metadata":{"accelerator":"GPU","colab":{"name":"post_training_new_building_text_classifier_GLOBAL.ipynb","provenance":[{"file_id":"107n_6BX7CWAgtWU-jCvGQqIEwapHAypW","timestamp":1632223110359},{"file_id":"1Z4M2TMQZF8Rm9qr7bsiHp8OHomed65eC","timestamp":1631185144173},{"file_id":"1_RXVFuF33u0ryY-AwifvVHwAsnLmapfU","timestamp":1619849841762}],"collapsed_sections":[]},"kernelspec":{"display_name":"python3","language":"python","name":"python3"},"widgets":{"application/vnd.jupyter.widget-state+json":{"ba4714ba054846bfb8a7cde939802ee2":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_91417618df6e4c70ab85162a79ab8c0c","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_80b3b5efab304b918146879a46d3beb6","IPY_MODEL_c20652cd73fd4ffbad5443187f71a132","IPY_MODEL_7e058a0966d142baa42dc93175ced60a"]}},"91417618df6e4c70ab85162a79ab8c0c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"80b3b5efab304b918146879a46d3beb6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_c675a92bda14426c8e75feccd78fc6a9","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":"Downloading: 100%","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_9b39615f46fa41f8a8950479819fdf06"}},"c20652cd73fd4ffbad5443187f71a132":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_view_name":"ProgressView","style":"IPY_MODEL_b0c251d4cb36484abe56992eb13e74d4","_dom_classes":[],"description":"","_model_name":"FloatProgressModel","bar_style":"success","max":570,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":570,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_514421c81abc4caab82be7d7e78fddf6"}},"7e058a0966d142baa42dc93175ced60a":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_91884139bcf74e6da31bae357f7a0dfe","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 570/570 [00:00&lt;00:00, 16.4kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_eb08b737e4d54aa686484b677de64ddf"}},"c675a92bda14426c8e75feccd78fc6a9":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"9b39615f46fa41f8a8950479819fdf06":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"b0c251d4cb36484abe56992eb13e74d4":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"514421c81abc4caab82be7d7e78fddf6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"91884139bcf74e6da31bae357f7a0dfe":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"eb08b737e4d54aa686484b677de64ddf":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"dd61a1a9f70044c590c4f64703bc0501":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_a053014ff52f487e8e95f212b50a0e96","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_f219ad4717d24ebea9c2f570df87b26c","IPY_MODEL_52f964f0d01a4e398d049c61bba623f7","IPY_MODEL_d1c0ed5430ff4a44909e14b8b29f1431"]}},"a053014ff52f487e8e95f212b50a0e96":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"f219ad4717d24ebea9c2f570df87b26c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_d717ff93d1d34106a08e51079b7b7786","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":"Downloading: 100%","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_08a2a223ed0f448ba249121487ae2853"}},"52f964f0d01a4e398d049c61bba623f7":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_view_name":"ProgressView","style":"IPY_MODEL_2577584b25d046b8af84e25d0afd27a5","_dom_classes":[],"description":"","_model_name":"FloatProgressModel","bar_style":"success","max":213450,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":213450,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_d7d3aa48c52245128ae508ffce5e044b"}},"d1c0ed5430ff4a44909e14b8b29f1431":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_b1bd2a62df4f473a9deaefa4436f28dd","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 213k/213k [00:00&lt;00:00, 666kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_eb84488e63f04ee9a380bc316f2a1842"}},"d717ff93d1d34106a08e51079b7b7786":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"08a2a223ed0f448ba249121487ae2853":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"2577584b25d046b8af84e25d0afd27a5":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"d7d3aa48c52245128ae508ffce5e044b":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"b1bd2a62df4f473a9deaefa4436f28dd":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"eb84488e63f04ee9a380bc316f2a1842":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"e4db8585ed1e4e588eaed4c5a08819c9":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_5f4980ee0b6548938c9f5c5a8bdd0b9e","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_01f8f3fb76af400381ab4f666a64a3e6","IPY_MODEL_a65e0a15aec14a3e868a6ce049b6dfba","IPY_MODEL_4d9ea117579a48f9b4fb9eae229e5806"]}},"5f4980ee0b6548938c9f5c5a8bdd0b9e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"01f8f3fb76af400381ab4f666a64a3e6":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_0d8e08e6fc6541a39101d71c0b920318","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":"Downloading: 100%","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_aa1cc4fa291a4a968fff33f9e2141ebf"}},"a65e0a15aec14a3e868a6ce049b6dfba":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_view_name":"ProgressView","style":"IPY_MODEL_dfeb0161b37040d688cbef662696659b","_dom_classes":[],"description":"","_model_name":"FloatProgressModel","bar_style":"success","max":29,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":29,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_8a56059f10b64276a1d0a6bdab615a92"}},"4d9ea117579a48f9b4fb9eae229e5806":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_1aabb75a659245e08ba48443ed389c0c","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 29.0/29.0 [00:00&lt;00:00, 843B/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_045ce01149f84e6e9d1c8f6e691200d8"}},"0d8e08e6fc6541a39101d71c0b920318":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"aa1cc4fa291a4a968fff33f9e2141ebf":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"dfeb0161b37040d688cbef662696659b":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"8a56059f10b64276a1d0a6bdab615a92":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"1aabb75a659245e08ba48443ed389c0c":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"045ce01149f84e6e9d1c8f6e691200d8":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"700ce5bb037e4ab495773e80f75f50ec":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_a05ef70273584bb4a28a96a609d0f198","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_7a371ed9020a41b38c3f5c04349f00e5","IPY_MODEL_2911b8ad27a240fe94c266636d724534","IPY_MODEL_6d33a76b426642689192ccd426829814"]}},"a05ef70273584bb4a28a96a609d0f198":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"7a371ed9020a41b38c3f5c04349f00e5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_6f6954a2c8fe4e7695ababf06a76d896","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":"Downloading: 100%","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_9c568c244d04433099814263f21b5613"}},"2911b8ad27a240fe94c266636d724534":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_view_name":"ProgressView","style":"IPY_MODEL_fd6c95cc50454c07a8d58c9192f82ff4","_dom_classes":[],"description":"","_model_name":"FloatProgressModel","bar_style":"success","max":435797,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":435797,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_9c9bde6c0a16430ca84d43b5a6d743ef"}},"6d33a76b426642689192ccd426829814":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_9a7245d094474b85ba2fd8b1c9ed038d","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 436k/436k [00:00&lt;00:00, 710kB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_c8bcbd82ef514ae9a3125dad6ad4c371"}},"6f6954a2c8fe4e7695ababf06a76d896":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"9c568c244d04433099814263f21b5613":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"fd6c95cc50454c07a8d58c9192f82ff4":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"9c9bde6c0a16430ca84d43b5a6d743ef":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"9a7245d094474b85ba2fd8b1c9ed038d":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"c8bcbd82ef514ae9a3125dad6ad4c371":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"5619cdfca3574b9dbee1ef56ea883fdc":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_1546055763ce452cafc09ce662bc172e","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_14ccddbf944c4dddbb93e74fef9073b5","IPY_MODEL_9ca6222dae974d68806be643fb28741c","IPY_MODEL_20618129b6eb4a50bb9210e6d81f6384"]}},"1546055763ce452cafc09ce662bc172e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"14ccddbf944c4dddbb93e74fef9073b5":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_3941f4827e3045648a639212fef12531","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":"Downloading: 100%","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_0354c76581a44811831e9a6ec74c7cf3"}},"9ca6222dae974d68806be643fb28741c":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_view_name":"ProgressView","style":"IPY_MODEL_2eaa01a97b49433597143c6c01667a54","_dom_classes":[],"description":"","_model_name":"FloatProgressModel","bar_style":"success","max":435779157,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":435779157,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_08e0086a177f4ecf818a05adbd75147c"}},"20618129b6eb4a50bb9210e6d81f6384":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_a38688f9e50244d7b655aedd59223e6a","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 436M/436M [00:08&lt;00:00, 54.2MB/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_2315eb7b81fc4f2a88dbebedbba2f2ab"}},"3941f4827e3045648a639212fef12531":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"0354c76581a44811831e9a6ec74c7cf3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"2eaa01a97b49433597143c6c01667a54":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"08e0086a177f4ecf818a05adbd75147c":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"a38688f9e50244d7b655aedd59223e6a":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"2315eb7b81fc4f2a88dbebedbba2f2ab":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"9a11148cee22473882227a87f1d1f8bf":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_bf628cd0a67545069a117da5a295a4fc","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_5537edadb93b417b9c39b67d7266cc8c","IPY_MODEL_d6bcf99ff4dd4dd292911e07125299f1","IPY_MODEL_efa06b2b644f4ce68d770afba0ac65f4"]}},"bf628cd0a67545069a117da5a295a4fc":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"5537edadb93b417b9c39b67d7266cc8c":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_fb1930bcfabf4762b776d09720df5f34","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 15%","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_cc4cb95deb20484191ebd7e5e71c098e"}},"d6bcf99ff4dd4dd292911e07125299f1":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_view_name":"ProgressView","style":"IPY_MODEL_675c7c50323749769b0c4b368c8657d3","_dom_classes":[],"description":"","_model_name":"FloatProgressModel","bar_style":"danger","max":68671,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":10001,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_2fd82bdaca554072ad66bac18a8830a3"}},"efa06b2b644f4ce68d770afba0ac65f4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_3c7c78db719040cab5980c9efc9eff4f","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 10001/68671 [40:30&lt;123:24:10,  7.57s/it]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_75bf76fbfb5d47dbb7f85d260bb7b649"}},"fb1930bcfabf4762b776d09720df5f34":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"cc4cb95deb20484191ebd7e5e71c098e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"675c7c50323749769b0c4b368c8657d3":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"2fd82bdaca554072ad66bac18a8830a3":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"3c7c78db719040cab5980c9efc9eff4f":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"75bf76fbfb5d47dbb7f85d260bb7b649":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"9b22d7def15545588615eefbda5e6523":{"model_module":"@jupyter-widgets/controls","model_name":"HBoxModel","model_module_version":"1.5.0","state":{"_view_name":"HBoxView","_dom_classes":[],"_model_name":"HBoxModel","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.5.0","box_style":"","layout":"IPY_MODEL_2379b52450744d99bcd16b9177a65e3e","_model_module":"@jupyter-widgets/controls","children":["IPY_MODEL_4a1fc41750734a928635974cc9b7ee53","IPY_MODEL_edcc14081d9a46d5b3c7009151835016","IPY_MODEL_973a4a9012f64382a68aec63f3af3ee4"]}},"2379b52450744d99bcd16b9177a65e3e":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"4a1fc41750734a928635974cc9b7ee53":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_4a8ee41fa079438fa488a5ac15145ba7","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":"  5%","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_8c4938a8cfb944c4aed59f9c54ca41a6"}},"edcc14081d9a46d5b3c7009151835016":{"model_module":"@jupyter-widgets/controls","model_name":"FloatProgressModel","model_module_version":"1.5.0","state":{"_view_name":"ProgressView","style":"IPY_MODEL_f54faa9bd17a43daaadffc4a5ee21227","_dom_classes":[],"description":"","_model_name":"FloatProgressModel","bar_style":"","max":68671,"_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":3200,"_view_count":null,"_view_module_version":"1.5.0","orientation":"horizontal","min":0,"description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_d249ca53324e477b99c23bddac2c3e35"}},"973a4a9012f64382a68aec63f3af3ee4":{"model_module":"@jupyter-widgets/controls","model_name":"HTMLModel","model_module_version":"1.5.0","state":{"_view_name":"HTMLView","style":"IPY_MODEL_8a42a1fdb5b54c3b8c5b885881540380","_dom_classes":[],"description":"","_model_name":"HTMLModel","placeholder":"​","_view_module":"@jupyter-widgets/controls","_model_module_version":"1.5.0","value":" 3200/68671 [12:30&lt;2:09:33,  8.42it/s]","_view_count":null,"_view_module_version":"1.5.0","description_tooltip":null,"_model_module":"@jupyter-widgets/controls","layout":"IPY_MODEL_842e21d0542d4fc9a0b1a894603cb3d0"}},"4a8ee41fa079438fa488a5ac15145ba7":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"8c4938a8cfb944c4aed59f9c54ca41a6":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"f54faa9bd17a43daaadffc4a5ee21227":{"model_module":"@jupyter-widgets/controls","model_name":"ProgressStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"ProgressStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","bar_color":null,"_model_module":"@jupyter-widgets/controls"}},"d249ca53324e477b99c23bddac2c3e35":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}},"8a42a1fdb5b54c3b8c5b885881540380":{"model_module":"@jupyter-widgets/controls","model_name":"DescriptionStyleModel","model_module_version":"1.5.0","state":{"_view_name":"StyleView","_model_name":"DescriptionStyleModel","description_width":"","_view_module":"@jupyter-widgets/base","_model_module_version":"1.5.0","_view_count":null,"_view_module_version":"1.2.0","_model_module":"@jupyter-widgets/controls"}},"842e21d0542d4fc9a0b1a894603cb3d0":{"model_module":"@jupyter-widgets/base","model_name":"LayoutModel","model_module_version":"1.2.0","state":{"_view_name":"LayoutView","grid_template_rows":null,"right":null,"justify_content":null,"_view_module":"@jupyter-widgets/base","overflow":null,"_model_module_version":"1.2.0","_view_count":null,"flex_flow":null,"width":null,"min_width":null,"border":null,"align_items":null,"bottom":null,"_model_module":"@jupyter-widgets/base","top":null,"grid_column":null,"overflow_y":null,"overflow_x":null,"grid_auto_flow":null,"grid_area":null,"grid_template_columns":null,"flex":null,"_model_name":"LayoutModel","justify_items":null,"grid_row":null,"max_height":null,"align_content":null,"visibility":null,"align_self":null,"height":null,"min_height":null,"padding":null,"grid_auto_rows":null,"grid_gap":null,"max_width":null,"order":null,"_view_module_version":"1.2.0","grid_template_areas":null,"object_position":null,"object_fit":null,"grid_auto_columns":null,"margin":null,"display":null,"left":null}}}}},"cells":[{"cell_type":"markdown","metadata":{"id":"Zhq2XDESZRI6"},"source":["Please run in colab. This is adapted from the opacus tutorial.\n","\n","This notebook presents global clipping. "]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"eNABKAnqoEop","executionInfo":{"status":"ok","timestamp":1632315292016,"user_tz":-480,"elapsed":11674,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"5b105f7d-953a-4325-8bca-014276702cf1"},"source":["!pip install opacus\n","!pip install transformers\n","!pip install config"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Collecting opacus\n","  Downloading opacus-0.14.0-py3-none-any.whl (114 kB)\n","\u001b[?25l\r\u001b[K     |██▉                             | 10 kB 37.7 MB/s eta 0:00:01\r\u001b[K     |█████▊                          | 20 kB 36.2 MB/s eta 0:00:01\r\u001b[K     |████████▋                       | 30 kB 40.3 MB/s eta 0:00:01\r\u001b[K     |███████████▌                    | 40 kB 28.7 MB/s eta 0:00:01\r\u001b[K     |██████████████▍                 | 51 kB 18.1 MB/s eta 0:00:01\r\u001b[K     |█████████████████▏              | 61 kB 15.3 MB/s eta 0:00:01\r\u001b[K     |████████████████████            | 71 kB 14.4 MB/s eta 0:00:01\r\u001b[K     |███████████████████████         | 81 kB 15.9 MB/s eta 0:00:01\r\u001b[K     |█████████████████████████▉      | 92 kB 17.4 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████▊   | 102 kB 13.7 MB/s eta 0:00:01\r\u001b[K     |███████████████████████████████▌| 112 kB 13.7 MB/s eta 0:00:01\r\u001b[K     |████████████████████████████████| 114 kB 13.7 MB/s \n","\u001b[?25hRequirement already satisfied: scipy>=1.2 in /usr/local/lib/python3.7/dist-packages (from opacus) (1.4.1)\n","Requirement already satisfied: torch>=1.3 in /usr/local/lib/python3.7/dist-packages (from opacus) (1.9.0+cu102)\n","Requirement already satisfied: numpy>=1.15 in /usr/local/lib/python3.7/dist-packages (from opacus) (1.19.5)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch>=1.3->opacus) (3.7.4.3)\n","Installing collected packages: opacus\n","Successfully installed opacus-0.14.0\n","Collecting transformers\n","  Downloading transformers-4.10.2-py3-none-any.whl (2.8 MB)\n","\u001b[K     |████████████████████████████████| 2.8 MB 15.0 MB/s \n","\u001b[?25hRequirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)\n","Collecting tokenizers<0.11,>=0.10.1\n","  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n","\u001b[K     |████████████████████████████████| 3.3 MB 36.6 MB/s \n","\u001b[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)\n","Collecting pyyaml>=5.1\n","  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)\n","\u001b[K     |████████████████████████████████| 636 kB 39.0 MB/s \n","\u001b[?25hRequirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.2)\n","Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (21.0)\n","Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)\n","Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.8.1)\n","Collecting sacremoses\n","  Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)\n","\u001b[K     |████████████████████████████████| 895 kB 60.2 MB/s \n","\u001b[?25hCollecting huggingface-hub>=0.0.12\n","  Downloading huggingface_hub-0.0.17-py3-none-any.whl (52 kB)\n","\u001b[K     |████████████████████████████████| 52 kB 1.6 MB/s \n","\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n","Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub>=0.0.12->transformers) (3.7.4.3)\n","Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)\n","Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.5.0)\n","Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)\n","Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.5.30)\n","Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)\n","Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)\n","Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)\n","Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)\n","Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n","Installing collected packages: tokenizers, sacremoses, pyyaml, huggingface-hub, transformers\n","  Attempting uninstall: pyyaml\n","    Found existing installation: PyYAML 3.13\n","    Uninstalling PyYAML-3.13:\n","      Successfully uninstalled PyYAML-3.13\n","Successfully installed huggingface-hub-0.0.17 pyyaml-5.4.1 sacremoses-0.0.45 tokenizers-0.10.3 transformers-4.10.2\n","Collecting config\n","  Downloading config-0.5.1-py2.py3-none-any.whl (20 kB)\n","Installing collected packages: config\n","Successfully installed config-0.5.1\n"]}]},{"cell_type":"code","metadata":{"id":"XzYOOhiZZVNK"},"source":["import config\n","clipping_mode = \"global\""],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"WUrI3qmuSXCf"},"source":["\n","if clipping_mode == \"global\":\n","  # open the file in read mode to read the current input to memory\n","  with open('/usr/local/lib/python3.7/dist-packages/opacus/per_sample_gradient_clip.py', 'r') as txtfile:\n","      lines = txtfile.readlines()\n","  lines[33] = \"import config\" + \"\\n\" + lines[33]\n","  lines[178]= \"            clip_factor=torch.where(clip_factor > self.norm_clipper.thresholds[0]/config.temp_ct, torch.ones_like(clip_factor)*self.norm_clipper.thresholds[0]/config.temp_ct, torch.zeros_like(clip_factor))\" + '\\n'+lines[178]              \n","  # write the edited content back to the file\n","  with open('/usr/local/lib/python3.7/dist-packages/opacus/per_sample_gradient_clip.py', 'w') as txtfile:\n","      txtfile.writelines(lines)\n","\n","  # close the file\n","  txtfile.close()"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"HuDl74L2cJSS","executionInfo":{"status":"ok","timestamp":1632315292018,"user_tz":-480,"elapsed":10,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"3c003def-d009-4d40-b96d-5c45b8620e26"},"source":["!nvidia-smi"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Wed Sep 22 12:54:51 2021       \n","+-----------------------------------------------------------------------------+\n","| NVIDIA-SMI 470.63.01    Driver Version: 460.32.03    CUDA Version: 11.2     |\n","|-------------------------------+----------------------+----------------------+\n","| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n","| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n","|                               |                      |               MIG M. |\n","|===============================+======================+======================|\n","|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |\n","| N/A   34C    P0    24W / 300W |      0MiB / 16160MiB |      0%      Default |\n","|                               |                      |                  N/A |\n","+-------------------------------+----------------------+----------------------+\n","                                                                               \n","+-----------------------------------------------------------------------------+\n","| Processes:                                                                  |\n","|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n","|        ID   ID                                                   Usage      |\n","|=============================================================================|\n","|  No running processes found                                                 |\n","+-----------------------------------------------------------------------------+\n"]}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"OHrMankJlQsh","executionInfo":{"status":"ok","timestamp":1632315330810,"user_tz":-480,"elapsed":26221,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"2ca5de9d-a933-4f6b-e5bc-503ac0b44a4f"},"source":["from google.colab import drive\n","import os\n","\n","DRIVE_MOUNT='/content/drive'\n","drive.mount(DRIVE_MOUNT)\n","\n","DP_SGD_FOLDER=os.path.join(DRIVE_MOUNT, 'My Drive', 'DP_SGD')\n","CLASSIFICATION_SIMULATION_FOLDER=os.path.join(DP_SGD_FOLDER, 'Classification_Simulations')\n","TASK_FOLDER=os.path.join(CLASSIFICATION_SIMULATION_FOLDER, 'Bert') ## Please fill in this by the name of the dataset\n","os.makedirs(CLASSIFICATION_SIMULATION_FOLDER, exist_ok=True)\n","os.makedirs(TASK_FOLDER, exist_ok=True)\n","\n","LOCAL_FOLDER=os.path.join(TASK_FOLDER, 'Local') ## Please fill in this by the name of local / global\n","LOSSES_FOLDER=os.path.join(LOCAL_FOLDER, 'losses')\n","POST_FOLDER=os.path.join(LOSSES_FOLDER, 'post')\n","os.makedirs(LOCAL_FOLDER, exist_ok=True)\n","os.makedirs(LOSSES_FOLDER, exist_ok=True)\n","os.makedirs(POST_FOLDER, exist_ok=True)"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Mounted at /content/drive\n"]}]},{"cell_type":"markdown","metadata":{"id":"dyqp0zFBSJ01"},"source":["# Building text classifier with Differential Privacy"]},{"cell_type":"markdown","metadata":{"id":"Gc5pOVyJSJ06"},"source":["In this tutorial we will train a text classifier with Differential Privacy by taking a model pre-trained on public text data and fine-tuning it for a different task.\n","\n","When training a model with differential privacy, we almost always face a trade-off between model size and accuracy on the task. The exact details depend on the problem, but a rule of thumb is that the fewer parameters the model has, the easier it is to get a good performance with DP.\n","\n","Most state-of-the-art NLP models are quite deep and large (e.g. [BERT-base](https://github.com/google-research/bert) has over 100M parameters), which makes task of training text model on a private datasets rather challenging.\n","\n","One way of addressing this problem is to divide the training process into two stages. First, we will pre-train the model on a public dataset, exposing the model to generic text data. Assuming that the generic text data is public, we will not be using differential privacy at this step. Then, we freeze most of the layers, leaving only a few upper layers to be trained on the private dataset using DP-SGD. This way we can get the best of both worlds - we have a deep and powerful text understanding model, while only training a small number of parameters with differentially private algorithm.\n","\n","In this tutorial we will take the pre-trained [BERT-base](https://github.com/google-research/bert) model and fine-tune it to recognize textual entailment on the [SNLI](https://nlp.stanford.edu/projects/snli/) dataset."]},{"cell_type":"markdown","metadata":{"id":"z223GZ9RSJ07"},"source":["## Dataset"]},{"cell_type":"markdown","metadata":{"id":"L-6x9lNbSJ07"},"source":["First, we need to download the dataset (we'll user Stanford NLP mirror)"]},{"cell_type":"code","metadata":{"id":"lWwoKAqlSJ08"},"source":["STANFORD_SNLI_URL = \"https://nlp.stanford.edu/projects/snli/snli_1.0.zip\"\n","DATA_DIR = os.path.join(TASK_FOLDER, 'Data')"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"Dr-IkuwTSJ08"},"source":["import zipfile\n","import urllib.request\n","import os\n","\n","def download_and_extract(dataset_url, data_dir):\n","    print(\"Downloading and extracting ...\")\n","    filename = \"snli.zip\"\n","    urllib.request.urlretrieve(dataset_url, filename)\n","    with zipfile.ZipFile(filename) as zip_ref:\n","        zip_ref.extractall(data_dir)\n","    os.remove(filename)\n","    print(\"Completed!\")\n","\n","download_and_extract(STANFORD_SNLI_URL, DATA_DIR)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"AUeBwbO5SJ09"},"source":["The dataset comes in two formats (`tsv` and `json`) and has already been split into train/dev/test. Let’s verify that’s the case."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"aQaW2RvjSJ09","executionInfo":{"status":"ok","timestamp":1632315333027,"user_tz":-480,"elapsed":1094,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"96f0f56e-0d54-4024-9894-af9adf8341a8"},"source":["snli_folder = os.path.join(DATA_DIR, \"snli_1.0\")\n","os.listdir(snli_folder)"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["['.DS_Store',\n"," 'README.txt',\n"," 'Icon\\r',\n"," 'snli_1.0_test.txt',\n"," 'snli_1.0_dev.txt',\n"," 'snli_1.0_test.jsonl',\n"," 'snli_1.0_dev.jsonl',\n"," 'snli_1.0_train.jsonl',\n"," 'snli_1.0_train.txt']"]},"metadata":{},"execution_count":7}]},{"cell_type":"markdown","metadata":{"id":"Duq9UfJdSJ0-"},"source":["Let's now take a look inside. [SNLI dataset](https://nlp.stanford.edu/projects/snli/) provides ample syntactic metadata, but we'll only use raw input text. Therefore, the only fields we're interested in are **sentence1** (premise), **sentence2** (hypothesis) and **gold_label** (label chosen by the majority of annotators).\n","\n","Label defines the relation between premise and hypothesis: either *contradiction*, *neutral* or *entailment*."]},{"cell_type":"code","metadata":{"id":"MMdvQ2p4SJ0-","colab":{"base_uri":"https://localhost:8080/","height":200},"executionInfo":{"status":"ok","timestamp":1632315339928,"user_tz":-480,"elapsed":6903,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"211782d5-90d4-4bf2-f085-b2a0e2401034"},"source":["import pandas as pd\n","train_path =  os.path.join(snli_folder, \"snli_1.0_train.txt\")\n","dev_path = os.path.join(snli_folder, \"snli_1.0_dev.txt\")\n","\n","df_train = pd.read_csv(train_path, sep='\\t')\n","df_test = pd.read_csv(dev_path, sep='\\t')\n","\n","df_train[['sentence1', 'sentence2', 'gold_label']][:5]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>sentence1</th>\n","      <th>sentence2</th>\n","      <th>gold_label</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>A person on a horse jumps over a broken down a...</td>\n","      <td>A person is training his horse for a competition.</td>\n","      <td>neutral</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>A person on a horse jumps over a broken down a...</td>\n","      <td>A person is at a diner, ordering an omelette.</td>\n","      <td>contradiction</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>A person on a horse jumps over a broken down a...</td>\n","      <td>A person is outdoors, on a horse.</td>\n","      <td>entailment</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>Children smiling and waving at camera</td>\n","      <td>They are smiling at their parents</td>\n","      <td>neutral</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>Children smiling and waving at camera</td>\n","      <td>There are children present</td>\n","      <td>entailment</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>"],"text/plain":["                                           sentence1  ...     gold_label\n","0  A person on a horse jumps over a broken down a...  ...        neutral\n","1  A person on a horse jumps over a broken down a...  ...  contradiction\n","2  A person on a horse jumps over a broken down a...  ...     entailment\n","3              Children smiling and waving at camera  ...        neutral\n","4              Children smiling and waving at camera  ...     entailment\n","\n","[5 rows x 3 columns]"]},"metadata":{},"execution_count":8}]},{"cell_type":"markdown","metadata":{"id":"6ZmPyJ7bSJ0-"},"source":["## Model"]},{"cell_type":"markdown","metadata":{"id":"Ry7uhWVlSJ0_"},"source":["BERT (Bidirectional Encoder Representations from Transformers) is state of the art approach to various NLP tasks. It uses a Transformer architecture and relies heavily on the concept of pre-training. \n","\n","We'll use a pre-trained BERT-base model, provided in huggingface [transformers](https://github.com/huggingface/transformers) repo.\n","It gives us a pytorch implementation for the classic BERT architecture, as well as a tokenizer and weights pre-trained on a public English corpus (Wikipedia).\n","\n","Please follow these [installation instrucitons](https://github.com/huggingface/transformers#installation) before proceeding."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":277,"referenced_widgets":["ba4714ba054846bfb8a7cde939802ee2","91417618df6e4c70ab85162a79ab8c0c","80b3b5efab304b918146879a46d3beb6","c20652cd73fd4ffbad5443187f71a132","7e058a0966d142baa42dc93175ced60a","c675a92bda14426c8e75feccd78fc6a9","9b39615f46fa41f8a8950479819fdf06","b0c251d4cb36484abe56992eb13e74d4","514421c81abc4caab82be7d7e78fddf6","91884139bcf74e6da31bae357f7a0dfe","eb08b737e4d54aa686484b677de64ddf","dd61a1a9f70044c590c4f64703bc0501","a053014ff52f487e8e95f212b50a0e96","f219ad4717d24ebea9c2f570df87b26c","52f964f0d01a4e398d049c61bba623f7","d1c0ed5430ff4a44909e14b8b29f1431","d717ff93d1d34106a08e51079b7b7786","08a2a223ed0f448ba249121487ae2853","2577584b25d046b8af84e25d0afd27a5","d7d3aa48c52245128ae508ffce5e044b","b1bd2a62df4f473a9deaefa4436f28dd","eb84488e63f04ee9a380bc316f2a1842","e4db8585ed1e4e588eaed4c5a08819c9","5f4980ee0b6548938c9f5c5a8bdd0b9e","01f8f3fb76af400381ab4f666a64a3e6","a65e0a15aec14a3e868a6ce049b6dfba","4d9ea117579a48f9b4fb9eae229e5806","0d8e08e6fc6541a39101d71c0b920318","aa1cc4fa291a4a968fff33f9e2141ebf","dfeb0161b37040d688cbef662696659b","8a56059f10b64276a1d0a6bdab615a92","1aabb75a659245e08ba48443ed389c0c","045ce01149f84e6e9d1c8f6e691200d8","700ce5bb037e4ab495773e80f75f50ec","a05ef70273584bb4a28a96a609d0f198","7a371ed9020a41b38c3f5c04349f00e5","2911b8ad27a240fe94c266636d724534","6d33a76b426642689192ccd426829814","6f6954a2c8fe4e7695ababf06a76d896","9c568c244d04433099814263f21b5613","fd6c95cc50454c07a8d58c9192f82ff4","9c9bde6c0a16430ca84d43b5a6d743ef","9a7245d094474b85ba2fd8b1c9ed038d","c8bcbd82ef514ae9a3125dad6ad4c371","5619cdfca3574b9dbee1ef56ea883fdc","1546055763ce452cafc09ce662bc172e","14ccddbf944c4dddbb93e74fef9073b5","9ca6222dae974d68806be643fb28741c","20618129b6eb4a50bb9210e6d81f6384","3941f4827e3045648a639212fef12531","0354c76581a44811831e9a6ec74c7cf3","2eaa01a97b49433597143c6c01667a54","08e0086a177f4ecf818a05adbd75147c","a38688f9e50244d7b655aedd59223e6a","2315eb7b81fc4f2a88dbebedbba2f2ab"]},"id":"JNV4351KSJ0_","executionInfo":{"status":"ok","timestamp":1632315359331,"user_tz":-480,"elapsed":19406,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"5b9af01c-f83d-4bd5-ddb7-dbb311c40717"},"source":["from transformers import BertConfig, BertTokenizer, BertForSequenceClassification\n","\n","model_name = \"bert-base-cased\"\n","config1 = BertConfig.from_pretrained(\n","    model_name,\n","    num_labels=3,\n",")\n","tokenizer = BertTokenizer.from_pretrained(\n","    \"bert-base-cased\",\n","    do_lower_case=False,\n",")\n","model = BertForSequenceClassification.from_pretrained(\n","    \"bert-base-cased\",\n","    config=config1,\n",")"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"ba4714ba054846bfb8a7cde939802ee2","version_minor":0,"version_major":2},"text/plain":["Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]"]},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"dd61a1a9f70044c590c4f64703bc0501","version_minor":0,"version_major":2},"text/plain":["Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]"]},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"e4db8585ed1e4e588eaed4c5a08819c9","version_minor":0,"version_major":2},"text/plain":["Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]"]},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"700ce5bb037e4ab495773e80f75f50ec","version_minor":0,"version_major":2},"text/plain":["Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]"]},"metadata":{}},{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"5619cdfca3574b9dbee1ef56ea883fdc","version_minor":0,"version_major":2},"text/plain":["Downloading:   0%|          | 0.00/436M [00:00<?, ?B/s]"]},"metadata":{}},{"output_type":"stream","name":"stderr","text":["Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']\n","- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n","- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n","Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.weight', 'classifier.bias']\n","You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"]}]},{"cell_type":"markdown","metadata":{"id":"Fjb7XXCsSJ0_"},"source":["The model has the following structure. It uses a combination of word, positional and token *embeddings* to create a sequence representation, then passes the data through 12 *transformer encoders* and finally uses a *linear classifier* to produce the final label.\n","As the model is already pre-trained and we only plan to fine-tune few upper layers, we want to freeze all layers, except for the last encoder and above (`BertPooler` and `Classifier`)."]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"WbbvzBo7Fj81","executionInfo":{"status":"ok","timestamp":1632315370624,"user_tz":-480,"elapsed":11297,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"4f43b9a6-f671-4e18-995c-63b832809548"},"source":["import torch\n","model.load_state_dict(torch.load(os.path.join(LOSSES_FOLDER, f\"local_model_0430.pt\")))"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["<All keys matched successfully>"]},"metadata":{},"execution_count":10}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8u8gsRRkSJ1A","executionInfo":{"status":"ok","timestamp":1632315370624,"user_tz":-480,"elapsed":6,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"fee2156a-002d-4ac4-f7c4-28ca7e4df140"},"source":["trainable_layers = [model.bert.encoder.layer[-1], model.bert.pooler, model.classifier]\n","total_params = 0\n","trainable_params = 0\n","\n","for p in model.parameters():\n","        p.requires_grad = False\n","        total_params += p.numel()\n","\n","for layer in trainable_layers:\n","    for p in layer.parameters():\n","        p.requires_grad = True\n","        trainable_params += p.numel()\n","\n","print(f\"Total parameters count: {total_params}\") # ~108M\n","print(f\"Trainable parameters count: {trainable_params}\") # ~7M"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["Total parameters count: 108312579\n","Trainable parameters count: 7680771\n"]}]},{"cell_type":"markdown","metadata":{"id":"oYPKUtqiSJ1A"},"source":["Thus, by using pre-trained model we reduce the number of trainable params from over 100 millions to just above 7.5 millions. This will help both performance and convergence with added noise."]},{"cell_type":"markdown","metadata":{"id":"TxPFOeTFSJ1A"},"source":["## Prepare the data"]},{"cell_type":"markdown","metadata":{"id":"K8dQau0-SJ1A"},"source":["Before we begin training, we need to preprocess the data and convert it to the format our model expects. \n","\n","(Note: it'll take 5-10 minutes to run on a laptop)"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"bLbAuU39SJ1A","executionInfo":{"status":"ok","timestamp":1632315667126,"user_tz":-480,"elapsed":296505,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"8fb9cf05-55a3-49f0-a0bd-5fc6d335e83a"},"source":["LABEL_LIST = ['contradiction', 'entailment', 'neutral']\n","MAX_SEQ_LENGHT = 128\n","\n","import torch\n","import transformers\n","from torch.utils.data import TensorDataset\n","from transformers.data.processors.utils import InputExample\n","from transformers.data.processors.glue import glue_convert_examples_to_features\n","\n","\n","def _create_examples(df, set_type):\n","    \"\"\" Convert raw dataframe to a list of InputExample. Filter malformed examples\n","    \"\"\"\n","    examples = []\n","    for index, row in df.iterrows():\n","        if row['gold_label'] not in LABEL_LIST:\n","            continue\n","        if not isinstance(row['sentence1'], str) or not isinstance(row['sentence2'], str):\n","            continue\n","            \n","        guid = f\"{index}-{set_type}\"\n","        examples.append(\n","            InputExample(guid=guid, text_a=row['sentence1'], text_b=row['sentence2'], label=row['gold_label']))\n","    return examples\n","\n","def _df_to_features(df, set_type):\n","    \"\"\" Pre-process text. This method will:\n","    1) tokenize inputs\n","    2) cut or pad each sequence to MAX_SEQ_LENGHT\n","    3) convert tokens into ids\n","    \n","    The output will contain:\n","    `input_ids` - padded token ids sequence\n","    `attention mask` - mask indicating padded tokens\n","    `token_type_ids` - mask indicating the split between premise and hypothesis\n","    `label` - label\n","    \"\"\"\n","    examples = _create_examples(df, set_type)\n","    \n","    #backward compatibility with older transformers versions\n","    legacy_kwards = {}\n","    from packaging import version\n","    if version.parse(transformers.__version__) < version.parse(\"2.9.0\"):\n","        legacy_kwards = {\n","            \"pad_on_left\": False,\n","            \"pad_token\": tokenizer.convert_tokens_to_ids([tokenizer.pad_token])[0],\n","            \"pad_token_segment_id\": 0,\n","        }\n","    \n","    return glue_convert_examples_to_features(\n","        examples=examples,\n","        tokenizer=tokenizer,\n","        label_list=LABEL_LIST,\n","        max_length=MAX_SEQ_LENGHT,\n","        output_mode=\"classification\",\n","        **legacy_kwards,\n","    )\n","\n","def _features_to_dataset(features):\n","    \"\"\" Convert features from `_df_to_features` into a single dataset\n","    \"\"\"\n","    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)\n","    all_attention_mask = torch.tensor(\n","        [f.attention_mask for f in features], dtype=torch.long\n","    )\n","    all_token_type_ids = torch.tensor(\n","        [f.token_type_ids for f in features], dtype=torch.long\n","    )\n","    all_labels = torch.tensor([f.label for f in features], dtype=torch.long)\n","    dataset = TensorDataset(\n","        all_input_ids, all_attention_mask, all_token_type_ids, all_labels\n","    )\n","\n","    return dataset\n","\n","train_features = _df_to_features(df_train, \"train\")\n","test_features = _df_to_features(df_test, \"test\")\n","\n","train_dataset = _features_to_dataset(train_features)\n","test_dataset = _features_to_dataset(test_features)"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/transformers/data/processors/glue.py:67: FutureWarning: This function will be removed from the library soon, preprocessing should be handled with the 🤗 Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/master/examples/pytorch/text-classification/run_glue.py\n","  warnings.warn(DEPRECATION_WARNING.format(\"function\"), FutureWarning)\n"]}]},{"cell_type":"markdown","metadata":{"id":"LcoqzcpYSJ1B"},"source":["## Choosing batch size\n","\n","Let's talk about batch sizes for a bit.\n","\n","In addition to all the considerations you normally take into account when choosing batch size, training model with DP adds another one - privacy cost. \n","\n","Because of the threat model we assume and the way we add noise to the gradients, larger batch sizes (to a certain extent) generally help convergence. We add the same amount of noise to each gradient update (scaled to the norm of one sample in the batch) regardless of the batch size. What this means is that as the batch size increases, the relative amount of noise added decreases. while preserving the same epsilon guarantee. \n","\n","You should, however, keep in mind that increasing batch size has its price in terms of epsilon, which grows at `O(sqrt(batch_size))` as we train (therefore larger batches make it grow faster). The good strategy here is to experiment with multiple combinations of `batch_size` and `noise_multiplier` to find the one that provides best possible quality at acceptable privacy guarantee.\n","\n","There's another side to this - memory. Opacus computes and stores *per sample* gradients, so for every normal gradient, Opacus will store `n=batch_size` per-sample gradients on each step, thus increasing the memory footprint by at least `O(batch_size)`. In reality, however, the peak memory requirement is `O(batch_size^2)` compared to non-private model. This is because some intermediate steps in per sample gradient computation involve operations on two matrices, each with batch_size as one of the dimensions.\n","\n","The good news is, we can pick the most appropriate batch size, regardless of memory constrains. Opacus has built-in support for *virtual* batches. Using it we can separate physical steps (gradient computation) and logical steps (noise addition and parameter updates): use larger batches for training, while keeping memory footprint low. Below we will specify two constants:\n","\n","- `BATCH_SIZE` defines the maximum batch size we can afford from a memory standpoint, and only affects computation speed\n","- `VIRTUAL_BATCH_SIZE`, on the other hand, is equivalent to normal batch_size in the non-private setting, and will affect convergence and privacy guarantee.\n","\n"]},{"cell_type":"code","metadata":{"id":"sg1UjMB1SJ1C"},"source":["BATCH_SIZE = 8\n","VIRTUAL_BATCH_SIZE = 32"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"F4soKsv1SJ1C"},"source":["from torch.utils.data import DataLoader, RandomSampler, SequentialSampler\n","\n","train_sampler = RandomSampler(train_dataset)\n","train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=BATCH_SIZE)\n","\n","test_sampler = SequentialSampler(test_dataset)\n","test_dataloader = DataLoader(test_dataset, sampler=test_sampler, batch_size=BATCH_SIZE)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"EiujWIzjSJ1C"},"source":["## Training"]},{"cell_type":"code","metadata":{"id":"EKWdC4k1SJ1D"},"source":["import torch\n","\n","# Move the model to appropriate device\n","device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n","model = model.to(device)\n","model.train()\n","# Define optimizer\n","optimizer = torch.optim.AdamW(model.parameters(), lr=5e-4, eps=1e-8)"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"hhS98izHSJ1D"},"source":["Next we will define and attach PrivacyEngine. There are two parameters you need to consider here:\n","\n","- `noise_multiplier`. It defines the trade-off between privacy and accuracy. Adding more noise will provide stronger privacy guarantees, but will also hurt model quality.\n","- `max_grad_norm`. Defines the maximum magnitude of L2 norms to which we clip per sample gradients. There is a bit of tug of war with this threshold: on the one hand, a low threshold means that we will clip many gradients, hurting convergence, so we might be tempted to raise it. However, recall that we add noise with `std=noise_multiplier * max_grad_norm` so we will pay for the increased threshold with more noise. In most cases you can rely on the model being quite resilient to clipping (after the first few iterations your model will tend to adjust so that its gradients stay below the clipping threshold), so you can often just keep the default value (`=1.0`) and focus on tuning `batch_size` and `noise_multiplier` instead. That being said, sometimes clipping hurts the model so it may be worth experimenting with different clipping thresholds, like we are doing in this tutorial.\n","\n","These two parameters define the scale of the noise we add to gradients: the noise will be sampled from a Gaussian distribution with `std=noise_multiplier * max_grad_norm`.\n"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iXKyw1mMSJ1D","executionInfo":{"status":"ok","timestamp":1632315667128,"user_tz":-480,"elapsed":15,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"2cedf021-5616-4cfa-edb4-0e5509ca0db9"},"source":["from opacus import PrivacyEngine\n","\n","ALPHAS = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64))\n","NOISE_MULTIPLIER = 0.4\n","MAX_GRAD_NORM = 0.1\n","\n","privacy_engine = PrivacyEngine(\n","    module=model,\n","    batch_size=VIRTUAL_BATCH_SIZE,\n","    sample_size=len(train_dataset),\n","    alphas=ALPHAS,\n","    noise_multiplier=NOISE_MULTIPLIER,\n","    max_grad_norm=MAX_GRAD_NORM,\n",")\n","privacy_engine.attach(optimizer)"],"execution_count":null,"outputs":[{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/opacus/privacy_engine.py:639: UserWarning: The sample rate will be defined from ``batch_size`` and ``sample_size``.The returned privacy budget will be incorrect.\n","  \"The sample rate will be defined from ``batch_size`` and ``sample_size``.\"\n","/usr/local/lib/python3.7/dist-packages/opacus/privacy_engine.py:230: UserWarning: Secure RNG turned off. This is perfectly fine for experimentation as it allows for much faster training performance, but remember to turn it on and retrain one last time before production with ``secure_rng`` turned on.\n","  \"Secure RNG turned off. This is perfectly fine for experimentation as it allows \"\n"]}]},{"cell_type":"markdown","metadata":{"id":"pnjQkJ3uSJ1D"},"source":["Let’s first define the evaluation cycle."]},{"cell_type":"code","metadata":{"id":"rKLLXnMYSJ1E"},"source":["import numpy as np\n","from tqdm.notebook import tqdm\n","from scipy.special import softmax\n","\n","def accuracy(preds, labels):\n","    return (preds == labels).mean()\n","\n","# define evaluation cycle\n","def evaluate(model):    \n","    model.eval()\n","\n","    loss_arr = []\n","    accuracy_arr = []\n","    \n","    for batch in test_dataloader:\n","        batch = tuple(t.to(device) for t in batch)\n","\n","        with torch.no_grad():\n","            inputs = {'input_ids':      batch[0],\n","                      'attention_mask': batch[1],\n","                      'token_type_ids': batch[2],\n","                      'labels':         batch[3]}\n","\n","            outputs = model(**inputs)\n","            loss, logits = outputs[:2]\n","            \n","            preds = np.argmax(logits.detach().cpu().numpy(), axis=1)\n","            labels = inputs['labels'].detach().cpu().numpy()\n","            \n","            loss_arr.append(loss.item())\n","            accuracy_arr.append(accuracy(preds, labels))\n","    \n","    model.train()\n","    return np.mean(loss_arr), np.mean(accuracy_arr)\n","    \n","def save_logits(model):\n","  # define evaluation cycle\n","    model.eval()\n","\n","    loss_arr = []\n","    accuracy_arr = []\n","    logits_arr = []\n","    labels_arr = []\n","    \n","    for batch in test_dataloader:\n","        batch = tuple(t.to(device) for t in batch)\n","\n","        with torch.no_grad():\n","            inputs = {'input_ids':      batch[0],\n","                      'attention_mask': batch[1],\n","                      'token_type_ids': batch[2],\n","                      'labels':         batch[3]}\n","\n","            outputs = model(**inputs)\n","            loss, logits = outputs[:2]\n","            \n","            preds = np.argmax(logits.detach().cpu().numpy(), axis=1)\n","            labels = inputs['labels'].detach().cpu().numpy()\n","            logits = logits.detach().cpu().numpy()\n","\n","            loss_arr.append(loss.item())\n","            accuracy_arr.append(accuracy(preds, labels))\n","            logits_arr.append(logits)\n","            labels_arr.append(labels)\n","\n","    \n","    model.train()\n","    return np.mean(loss_arr), np.mean(accuracy_arr), logits_arr, labels_arr\n","\n","def getECEMCE(logits_arr, labels_arr):\n","    model.eval()\n","    logits = []\n","    for logit in logits_arr:\n","        for row in logit:\n","            logits.append(row)\n","    logits = np.array(logits)\n","\n","    targets_te = []\n","    for target in labels_arr:\n","        for row in target:\n","            targets_te.append(row)\n","    targets_te = np.array(targets_te)\n","\n","    y_te = softmax(logits, axis = 1)\n","    y_te_prob = np.max(y_te,axis=1) # predicted prob\n","    y_te_single= np.argmax(y_te, axis = 1) # prediction\n","    #target_prob= y_te[np.arange(0, y_te.shape[0]), targets_te] ## p_y\n","    acc = np.mean(y_te_single == targets_te)\n","\n","    ## Diff in mean\n","    diff = np.abs(acc - np.mean(np.max(y_te,axis=1)))\n","\n","    ## ECE default 20 partitions\n","    n_partitions = 20\n","    idxs = {i:[] for i in range(n_partitions)}\n","    for idx, prob in enumerate(y_te_prob):\n","        idxs[min(int(prob * n_partitions), 19)].append(idx)\n","\n","    CEs = [0 for _ in range(n_partitions)]\n","    for i, idx_lst in enumerate(idxs.values()):\n","        if idx_lst:\n","            idx_lst = np.array(idx_lst)\n","            accuracy = np.mean(y_te_single[idx_lst] == targets_te[idx_lst])\n","            conf = np.mean(y_te_prob[idx_lst])\n","            CEs[i] = (np.abs(accuracy - conf))\n","\n","    ECE = np.sum([len(idx_lst) * CE for idx_lst, CE in zip(idxs.values(), CEs)]) / len(logits)\n","\n","    MCE = np.max(CEs)\n","    \n","    model.train()\n","    return diff, ECE, MCE\n","\n"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"FlmjHsEcSJ1E"},"source":["Now we specify the training parameters and run the training loop for three epochs"]},{"cell_type":"code","metadata":{"id":"JWLDl4lSSJ1E"},"source":["EPOCHS = 1\n","LOGGING_INTERVAL = 200 # once every how many steps we run evaluation cycle and report metrics\n","DELTA = 1 / len(train_dataloader) # Parameter for privacy accounting. Probability of not uploding privacy guarantees\n","\n","\n","assert VIRTUAL_BATCH_SIZE % BATCH_SIZE == 0 # VIRTUAL_BATCH_SIZE should be divisible by BATCH_SIZE\n","virtual_batch_rate = VIRTUAL_BATCH_SIZE / BATCH_SIZE"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"pWJ7xg43dfHF"},"source":["config.temp_ct = 1000"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":993,"referenced_widgets":["9a11148cee22473882227a87f1d1f8bf","bf628cd0a67545069a117da5a295a4fc","5537edadb93b417b9c39b67d7266cc8c","d6bcf99ff4dd4dd292911e07125299f1","efa06b2b644f4ce68d770afba0ac65f4","fb1930bcfabf4762b776d09720df5f34","cc4cb95deb20484191ebd7e5e71c098e","675c7c50323749769b0c4b368c8657d3","2fd82bdaca554072ad66bac18a8830a3","3c7c78db719040cab5980c9efc9eff4f","75bf76fbfb5d47dbb7f85d260bb7b649"]},"id":"4W_UncQx5k2Y","executionInfo":{"status":"ok","timestamp":1632239881564,"user_tz":-480,"elapsed":905345,"user":{"displayName":"Hua Wang","photoUrl":"https://lh3.googleusercontent.com/a/default-user=s64","userId":"12940841504559839160"}},"outputId":"b3b72eee-1876-416d-c6a9-95ef165a350a"},"source":["## 5000\n","model.train()\n","train_long_losses = []\n","train_long_accuracy = []\n","eval_long_losses = []\n","eval_long_accuracy = []\n","diff_long = []\n","ECE_long = []\n","MCE_long = []\n","for epoch in range(1, EPOCHS+1):\n","    losses = []\n","    accuracies = []\n","    \n","    for step, batch in enumerate(tqdm(train_dataloader)):\n","        if step > 10000:\n","            break\n","        batch = tuple(t.to(device) for t in batch)\n","        inputs = {'input_ids':      batch[0],\n","                  'attention_mask': batch[1],\n","                  'token_type_ids': batch[2],\n","                  'labels':         batch[3]}\n","\n","        outputs = model(**inputs) # output = loss, logits, hidden_states, attentions\n","        \n","        loss = outputs[0]\n","        loss.backward()\n","        \n","        losses.append(loss.item())\n","\n","        logits = outputs[1]\n","        preds = np.argmax(logits.detach().cpu().numpy(), axis=1)\n","        labels = inputs['labels'].detach().cpu().numpy()\n","        accuracies.append(accuracy(preds, labels))\n","\n","        # We process small batches of size BATCH_SIZE, \n","        # until they're accumulated to a batch of size VIRTUAL_BATCH_SIZE.\n","        # Only then we make a real `.step()` and update model weights\n","        if (step + 1) % virtual_batch_rate == 0 or step == len(train_dataloader) - 1:\n","            optimizer.step()\n","        else:\n","            optimizer.virtual_step()\n","\n","        if step > 0 and step % LOGGING_INTERVAL == 0:\n","            train_loss = np.mean(losses)\n","            train_accuracy = np.mean(accuracies)\n","            eps, alpha = optimizer.privacy_engine.get_privacy_spent(DELTA)\n","\n","            # eval_loss, eval_accuracy = evaluate(model)\n","            eval_loss, eval_accuracy, logits_arr, labels_arr = save_logits(model)\n","            train_long_losses.append(train_loss.item())\n","            train_long_accuracy.append(train_accuracy.item())\n","            eval_long_losses.append(eval_loss.item())\n","            eval_long_accuracy.append(eval_accuracy.item())\n","\n","            diff, ECE, MCE = getECEMCE(logits_arr, labels_arr)\n","            diff_long.append(diff)\n","            ECE_long.append(ECE)\n","            MCE_long.append(MCE)\n","            print(\n","                f\"Epoch: {epoch} | \"\n","                f\"Step: {step} | \"\n","                f\"Train loss: {train_loss:.3f} | \"\n","                f\"Train acc: {train_accuracy:.3f} | \"\n","                f\"Eval loss: {eval_loss:.3f} | \"\n","                f\"Eval acc: {eval_accuracy:.3f} | \"\n","                f\"ECE: {ECE:.5f} | \"\n","                f\"MCE: {MCE:.5f} | \"\n","                f\"ɛ: {eps:.2f} (α: {alpha})\"\n","            )\n","            \n","            #torch.save([train_long_losses, train_long_accuracy, eval_long_losses, eval_long_accuracy], os.path.join(POST_FOLDER, f\"global_post_loss_acc_at_{step}_Z{config.temp_ct}_0921.pt\"))\n","            torch.save([eval_loss, eval_accuracy, logits_arr, labels_arr, diff, ECE, MCE], os.path.join(POST_FOLDER, f\"global_post_logits_labels_ECE_MCE_at_{step}_Z{config.temp_ct}_0921.pt\"))\n","            #torch.save([diff, ECE, MCE], os.path.join(POST_FOLDER, f\"global_post_diff_ECE_MCE_at_{step}_Z{config.temp_ct}_0921.pt\"))\n","    \n","## Save the final model's prediction logits on the testset, and the labels of the testset:\n","\n","eval_loss, eval_accuracy, logits_arr, labels_arr = save_logits(model)\n","torch.save([train_long_losses, train_long_accuracy, eval_long_losses, eval_long_accuracy], os.path.join(LOSSES_FOLDER, f\"global_post_loss_acc_at_final_Z{config.temp_ct}_0921.pt\"))\n","torch.save([diff_long, ECE_long, MCE_long], os.path.join(LOSSES_FOLDER, f\"global_post_diff_ECE_MCE_at_final_Z{config.temp_ct}_0921.pt\"))\n","## try to save the model itself\n","torch.save(model.state_dict(), os.path.join(LOSSES_FOLDER, f\"global_post_model_Z{config.temp_ct}_0921.pt\"))\n"],"execution_count":null,"outputs":[{"data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9a11148cee22473882227a87f1d1f8bf","version_major":2,"version_minor":0},"text/plain":["  0%|          | 0/68671 [00:00<?, ?it/s]"]},"metadata":{},"output_type":"display_data"},{"metadata":{"tags":null},"name":"stderr","output_type":"stream","text":["/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py:974: UserWarning: Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.\n","  warnings.warn(\"Using a non-full backward hook when the forward contains multiple autograd Nodes \"\n"]},{"output_type":"stream","name":"stdout","text":["Epoch: 1 | Step: 200 | Train loss: 1.833 | Train acc: 0.678 | Eval loss: 1.715 | Eval acc: 0.748 | ECE: 0.21335 | MCE: 0.60480 | ɛ: 3.47 (α: 3.7)\n","Epoch: 1 | Step: 400 | Train loss: 1.762 | Train acc: 0.690 | Eval loss: 1.692 | Eval acc: 0.747 | ECE: 0.21379 | MCE: 0.61904 | ɛ: 3.58 (α: 3.6)\n","Epoch: 1 | Step: 600 | Train loss: 1.777 | Train acc: 0.688 | Eval loss: 1.671 | Eval acc: 0.748 | ECE: 0.21152 | MCE: 0.41627 | ɛ: 3.63 (α: 3.6)\n","Epoch: 1 | Step: 800 | Train loss: 1.768 | Train acc: 0.687 | Eval loss: 1.668 | Eval acc: 0.749 | ECE: 0.21177 | MCE: 0.66014 | ɛ: 3.69 (α: 3.6)\n","Epoch: 1 | Step: 1000 | Train loss: 1.778 | Train acc: 0.686 | Eval loss: 1.659 | Eval acc: 0.748 | ECE: 0.21123 | MCE: 0.44004 | ɛ: 3.73 (α: 3.5)\n","Epoch: 1 | Step: 1200 | Train loss: 1.761 | Train acc: 0.688 | Eval loss: 1.647 | Eval acc: 0.745 | ECE: 0.21347 | MCE: 0.41639 | ɛ: 3.75 (α: 3.5)\n","Epoch: 1 | Step: 1400 | Train loss: 1.767 | Train acc: 0.687 | Eval loss: 1.637 | Eval acc: 0.747 | ECE: 0.21174 | MCE: 0.45372 | ɛ: 3.78 (α: 3.5)\n","Epoch: 1 | Step: 1600 | Train loss: 1.758 | Train acc: 0.689 | Eval loss: 1.626 | Eval acc: 0.746 | ECE: 0.21167 | MCE: 0.43862 | ɛ: 3.80 (α: 3.5)\n","Epoch: 1 | Step: 1800 | Train loss: 1.748 | Train acc: 0.692 | Eval loss: 1.603 | Eval acc: 0.746 | ECE: 0.21198 | MCE: 0.45345 | ɛ: 3.82 (α: 3.5)\n","Epoch: 1 | Step: 2000 | Train loss: 1.738 | Train acc: 0.691 | Eval loss: 1.586 | Eval acc: 0.745 | ECE: 0.21161 | MCE: 0.41894 | ɛ: 3.85 (α: 3.5)\n","Epoch: 1 | Step: 2200 | Train loss: 1.736 | Train acc: 0.690 | Eval loss: 1.574 | Eval acc: 0.744 | ECE: 0.21348 | MCE: 0.40751 | ɛ: 3.87 (α: 3.5)\n","Epoch: 1 | Step: 2400 | Train loss: 1.727 | Train acc: 0.691 | Eval loss: 1.565 | Eval acc: 0.744 | ECE: 0.21231 | MCE: 0.40791 | ɛ: 3.89 (α: 3.5)\n","Epoch: 1 | Step: 2600 | Train loss: 1.714 | Train acc: 0.691 | Eval loss: 1.551 | Eval acc: 0.744 | ECE: 0.21161 | MCE: 0.45174 | ɛ: 3.91 (α: 3.4)\n","Epoch: 1 | Step: 2800 | Train loss: 1.710 | Train acc: 0.691 | Eval loss: 1.537 | Eval acc: 0.744 | ECE: 0.21166 | MCE: 0.42612 | ɛ: 3.92 (α: 3.4)\n","Epoch: 1 | Step: 3000 | Train loss: 1.708 | Train acc: 0.691 | Eval loss: 1.526 | Eval acc: 0.744 | ECE: 0.21010 | MCE: 0.41392 | ɛ: 3.93 (α: 3.4)\n","Epoch: 1 | Step: 3200 | Train loss: 1.698 | Train acc: 0.692 | Eval loss: 1.517 | Eval acc: 0.743 | ECE: 0.21078 | MCE: 0.38889 | ɛ: 3.94 (α: 3.4)\n","Epoch: 1 | Step: 3400 | Train loss: 1.692 | Train acc: 0.692 | Eval loss: 1.510 | Eval acc: 0.743 | ECE: 0.21111 | MCE: 0.41675 | ɛ: 3.95 (α: 3.4)\n","Epoch: 1 | Step: 3600 | Train loss: 1.690 | Train acc: 0.691 | Eval loss: 1.505 | Eval acc: 0.745 | ECE: 0.20932 | MCE: 0.65522 | ɛ: 3.96 (α: 3.4)\n","Epoch: 1 | Step: 3800 | Train loss: 1.690 | Train acc: 0.691 | Eval loss: 1.483 | Eval acc: 0.744 | ECE: 0.20827 | MCE: 0.40423 | ɛ: 3.97 (α: 3.4)\n","Epoch: 1 | Step: 4000 | Train loss: 1.681 | Train acc: 0.691 | Eval loss: 1.481 | Eval acc: 0.743 | ECE: 0.20873 | MCE: 0.65495 | ɛ: 3.98 (α: 3.4)\n","Epoch: 1 | Step: 4200 | Train loss: 1.670 | Train acc: 0.692 | Eval loss: 1.483 | Eval acc: 0.743 | ECE: 0.20809 | MCE: 0.40508 | ɛ: 4.00 (α: 3.4)\n","Epoch: 1 | Step: 4400 | Train loss: 1.666 | Train acc: 0.692 | Eval loss: 1.486 | Eval acc: 0.741 | ECE: 0.21034 | MCE: 0.42172 | ɛ: 4.01 (α: 3.4)\n","Epoch: 1 | Step: 4600 | Train loss: 1.667 | Train acc: 0.692 | Eval loss: 1.486 | Eval acc: 0.741 | ECE: 0.21028 | MCE: 0.65649 | ɛ: 4.02 (α: 3.4)\n","Epoch: 1 | Step: 4800 | Train loss: 1.664 | Train acc: 0.693 | Eval loss: 1.490 | Eval acc: 0.740 | ECE: 0.21119 | MCE: 0.39733 | ɛ: 4.03 (α: 3.4)\n","Epoch: 1 | Step: 5000 | Train loss: 1.662 | Train acc: 0.692 | Eval loss: 1.479 | Eval acc: 0.741 | ECE: 0.20965 | MCE: 0.42197 | ɛ: 4.04 (α: 3.4)\n","Epoch: 1 | Step: 5200 | Train loss: 1.661 | Train acc: 0.692 | Eval loss: 1.468 | Eval acc: 0.741 | ECE: 0.20899 | MCE: 0.39832 | ɛ: 4.05 (α: 3.4)\n","Epoch: 1 | Step: 5400 | Train loss: 1.658 | Train acc: 0.692 | Eval loss: 1.457 | Eval acc: 0.741 | ECE: 0.20911 | MCE: 0.42623 | ɛ: 4.06 (α: 3.4)\n","Epoch: 1 | Step: 5600 | Train loss: 1.652 | Train acc: 0.692 | Eval loss: 1.451 | Eval acc: 0.740 | ECE: 0.20896 | MCE: 0.41410 | ɛ: 4.07 (α: 3.4)\n","Epoch: 1 | Step: 5800 | Train loss: 1.648 | Train acc: 0.692 | Eval loss: 1.456 | Eval acc: 0.739 | ECE: 0.20931 | MCE: 0.39758 | ɛ: 4.08 (α: 3.4)\n","Epoch: 1 | Step: 6000 | Train loss: 1.646 | Train acc: 0.691 | Eval loss: 1.460 | Eval acc: 0.739 | ECE: 0.20901 | MCE: 0.38588 | ɛ: 4.09 (α: 3.4)\n","Epoch: 1 | Step: 6200 | Train loss: 1.641 | Train acc: 0.691 | Eval loss: 1.460 | Eval acc: 0.740 | ECE: 0.20844 | MCE: 0.39111 | ɛ: 4.10 (α: 3.4)\n","Epoch: 1 | Step: 6400 | Train loss: 1.639 | Train acc: 0.691 | Eval loss: 1.458 | Eval acc: 0.740 | ECE: 0.20828 | MCE: 0.40124 | ɛ: 4.11 (α: 3.4)\n","Epoch: 1 | Step: 6600 | Train loss: 1.636 | Train acc: 0.691 | Eval loss: 1.461 | Eval acc: 0.738 | ECE: 0.21017 | MCE: 0.37582 | ɛ: 4.12 (α: 3.4)\n","Epoch: 1 | Step: 6800 | Train loss: 1.635 | Train acc: 0.690 | Eval loss: 1.455 | Eval acc: 0.738 | ECE: 0.21018 | MCE: 0.39543 | ɛ: 4.13 (α: 3.3)\n","Epoch: 1 | Step: 7000 | Train loss: 1.636 | Train acc: 0.689 | Eval loss: 1.435 | Eval acc: 0.740 | ECE: 0.20769 | MCE: 0.42404 | ɛ: 4.13 (α: 3.3)\n","Epoch: 1 | Step: 7200 | Train loss: 1.637 | Train acc: 0.689 | Eval loss: 1.417 | Eval acc: 0.739 | ECE: 0.20696 | MCE: 0.40122 | ɛ: 4.14 (α: 3.3)\n","Epoch: 1 | Step: 7400 | Train loss: 1.634 | Train acc: 0.689 | Eval loss: 1.404 | Eval acc: 0.738 | ECE: 0.20758 | MCE: 0.36840 | ɛ: 4.14 (α: 3.3)\n","Epoch: 1 | Step: 7600 | Train loss: 1.633 | Train acc: 0.689 | Eval loss: 1.397 | Eval acc: 0.737 | ECE: 0.20765 | MCE: 0.39295 | ɛ: 4.15 (α: 3.3)\n","Epoch: 1 | Step: 7800 | Train loss: 1.633 | Train acc: 0.688 | Eval loss: 1.372 | Eval acc: 0.737 | ECE: 0.20564 | MCE: 0.65691 | ɛ: 4.15 (α: 3.3)\n","Epoch: 1 | Step: 8000 | Train loss: 1.632 | Train acc: 0.688 | Eval loss: 1.372 | Eval acc: 0.735 | ECE: 0.20649 | MCE: 0.37223 | ɛ: 4.15 (α: 3.3)\n","Epoch: 1 | Step: 8200 | Train loss: 1.627 | Train acc: 0.688 | Eval loss: 1.379 | Eval acc: 0.734 | ECE: 0.20744 | MCE: 0.38327 | ɛ: 4.16 (α: 3.3)\n","Epoch: 1 | Step: 8400 | Train loss: 1.624 | Train acc: 0.688 | Eval loss: 1.367 | Eval acc: 0.735 | ECE: 0.20628 | MCE: 0.65006 | ɛ: 4.16 (α: 3.3)\n","Epoch: 1 | Step: 8600 | Train loss: 1.624 | Train acc: 0.688 | Eval loss: 1.352 | Eval acc: 0.737 | ECE: 0.20119 | MCE: 0.38074 | ɛ: 4.17 (α: 3.3)\n","Epoch: 1 | Step: 8800 | Train loss: 1.622 | Train acc: 0.687 | Eval loss: 1.334 | Eval acc: 0.736 | ECE: 0.20020 | MCE: 0.35461 | ɛ: 4.17 (α: 3.3)\n","Epoch: 1 | Step: 9000 | Train loss: 1.619 | Train acc: 0.687 | Eval loss: 1.326 | Eval acc: 0.736 | ECE: 0.20059 | MCE: 0.37648 | ɛ: 4.18 (α: 3.3)\n","Epoch: 1 | Step: 9200 | Train loss: 1.613 | Train acc: 0.687 | Eval loss: 1.326 | Eval acc: 0.736 | ECE: 0.20103 | MCE: 0.35519 | ɛ: 4.18 (α: 3.3)\n","Epoch: 1 | Step: 9400 | Train loss: 1.610 | Train acc: 0.687 | Eval loss: 1.333 | Eval acc: 0.735 | ECE: 0.20277 | MCE: 0.36502 | ɛ: 4.19 (α: 3.3)\n","Epoch: 1 | Step: 9600 | Train loss: 1.609 | Train acc: 0.686 | Eval loss: 1.335 | Eval acc: 0.734 | ECE: 0.20324 | MCE: 0.36622 | ɛ: 4.19 (α: 3.3)\n","Epoch: 1 | Step: 9800 | Train loss: 1.608 | Train acc: 0.686 | Eval loss: 1.335 | Eval acc: 0.733 | ECE: 0.20483 | MCE: 0.38245 | ɛ: 4.20 (α: 3.3)\n","Epoch: 1 | Step: 10000 | Train loss: 1.609 | Train acc: 0.685 | Eval loss: 1.323 | Eval acc: 0.733 | ECE: 0.20392 | MCE: 0.35161 | ɛ: 4.20 (α: 3.3)\n"]}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":345,"referenced_widgets":["9b22d7def15545588615eefbda5e6523","2379b52450744d99bcd16b9177a65e3e","4a1fc41750734a928635974cc9b7ee53","edcc14081d9a46d5b3c7009151835016","973a4a9012f64382a68aec63f3af3ee4","4a8ee41fa079438fa488a5ac15145ba7","8c4938a8cfb944c4aed59f9c54ca41a6","f54faa9bd17a43daaadffc4a5ee21227","d249ca53324e477b99c23bddac2c3e35","8a42a1fdb5b54c3b8c5b885881540380","842e21d0542d4fc9a0b1a894603cb3d0"]},"id":"GD6fpL_XSJ1E","outputId":"e1730668-a531-470c-ae08-0e8988e1d131"},"source":["## 1000\n","model.train()\n","train_long_losses = []\n","train_long_accuracy = []\n","eval_long_losses = []\n","eval_long_accuracy = []\n","diff_long = []\n","ECE_long = []\n","MCE_long = []\n","for epoch in range(1, EPOCHS+1):\n","    losses = []\n","    accuracies = []\n","    \n","    for step, batch in enumerate(tqdm(train_dataloader)):\n","        if step > 10000:\n","            break\n","        batch = tuple(t.to(device) for t in batch)\n","        inputs = {'input_ids':      batch[0],\n","                  'attention_mask': batch[1],\n","                  'token_type_ids': batch[2],\n","                  'labels':         batch[3]}\n","\n","        outputs = model(**inputs) # output = loss, logits, hidden_states, attentions\n","        \n","        loss = outputs[0]\n","        loss.backward()\n","        \n","        losses.append(loss.item())\n","\n","        logits = outputs[1]\n","        preds = np.argmax(logits.detach().cpu().numpy(), axis=1)\n","        labels = inputs['labels'].detach().cpu().numpy()\n","        accuracies.append(accuracy(preds, labels))\n","\n","        # We process small batches of size BATCH_SIZE, \n","        # until they're accumulated to a batch of size VIRTUAL_BATCH_SIZE.\n","        # Only then we make a real `.step()` and update model weights\n","        if (step + 1) % virtual_batch_rate == 0 or step == len(train_dataloader) - 1:\n","            optimizer.step()\n","        else:\n","            optimizer.virtual_step()\n","\n","        if step > 0 and step % LOGGING_INTERVAL == 0:\n","            train_loss = np.mean(losses)\n","            train_accuracy = np.mean(accuracies)\n","            eps, alpha = optimizer.privacy_engine.get_privacy_spent(DELTA)\n","\n","            # eval_loss, eval_accuracy = evaluate(model)\n","            eval_loss, eval_accuracy, logits_arr, labels_arr = save_logits(model)\n","            train_long_losses.append(train_loss.item())\n","            train_long_accuracy.append(train_accuracy.item())\n","            eval_long_losses.append(eval_loss.item())\n","            eval_long_accuracy.append(eval_accuracy.item())\n","\n","            diff, ECE, MCE = getECEMCE(logits_arr, labels_arr)\n","            diff_long.append(diff)\n","            ECE_long.append(ECE)\n","            MCE_long.append(MCE)\n","            print(\n","                f\"Epoch: {epoch} | \"\n","                f\"Step: {step} | \"\n","                f\"Train loss: {train_loss:.3f} | \"\n","                f\"Train acc: {train_accuracy:.3f} | \"\n","                f\"Eval loss: {eval_loss:.3f} | \"\n","                f\"Eval acc: {eval_accuracy:.3f} | \"\n","                f\"ECE: {ECE:.5f} | \"\n","                f\"MCE: {MCE:.5f} | \"\n","                f\"ɛ: {eps:.2f} (α: {alpha})\"\n","            )\n","            \n","            #torch.save([train_long_losses, train_long_accuracy, eval_long_losses, eval_long_accuracy], os.path.join(POST_FOLDER, f\"global_post_loss_acc_at_{step}_Z{config.temp_ct}_0921.pt\"))\n","            torch.save([eval_loss, eval_accuracy, logits_arr, labels_arr, diff, ECE, MCE], os.path.join(POST_FOLDER, f\"global_post_logits_labels_ECE_MCE_at_{step}_Z{config.temp_ct}_0921.pt\"))\n","            #torch.save([diff, ECE, MCE], os.path.join(POST_FOLDER, f\"global_post_diff_ECE_MCE_at_{step}_Z{config.temp_ct}_0921.pt\"))\n","    \n","## Save the final model's prediction logits on the testset, and the labels of the testset:\n","\n","eval_loss, eval_accuracy, logits_arr, labels_arr = save_logits(model)\n","torch.save([train_long_losses, train_long_accuracy, eval_long_losses, eval_long_accuracy], os.path.join(LOSSES_FOLDER, f\"global_post_loss_acc_at_final_Z{config.temp_ct}_0921.pt\"))\n","torch.save([diff_long, ECE_long, MCE_long], os.path.join(LOSSES_FOLDER, f\"global_post_diff_ECE_MCE_at_final_Z{config.temp_ct}_0921.pt\"))\n","## try to save the model itself\n","torch.save(model.state_dict(), os.path.join(LOSSES_FOLDER, f\"global_post_model_Z{config.temp_ct}_0921.pt\"))\n"],"execution_count":null,"outputs":[{"output_type":"display_data","data":{"application/vnd.jupyter.widget-view+json":{"model_id":"9b22d7def15545588615eefbda5e6523","version_minor":0,"version_major":2},"text/plain":["  0%|          | 0/68671 [00:00<?, ?it/s]"]},"metadata":{}},{"output_type":"stream","name":"stderr","text":["/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py:974: UserWarning: Using a non-full backward hook when the forward contains multiple autograd Nodes is deprecated and will be removed in future versions. This hook will be missing some grad_input. Please use register_full_backward_hook to get the documented behavior.\n","  warnings.warn(\"Using a non-full backward hook when the forward contains multiple autograd Nodes \"\n"]},{"output_type":"stream","name":"stdout","text":["Epoch: 1 | Step: 200 | Train loss: 1.739 | Train acc: 0.711 | Eval loss: 1.712 | Eval acc: 0.746 | ECE: 0.21398 | MCE: 0.38496 | ɛ: 3.47 (α: 3.7)\n","Epoch: 1 | Step: 400 | Train loss: 1.692 | Train acc: 0.714 | Eval loss: 1.684 | Eval acc: 0.746 | ECE: 0.21383 | MCE: 0.65252 | ɛ: 3.58 (α: 3.6)\n","Epoch: 1 | Step: 600 | Train loss: 1.673 | Train acc: 0.710 | Eval loss: 1.689 | Eval acc: 0.745 | ECE: 0.21426 | MCE: 0.39753 | ɛ: 3.63 (α: 3.6)\n","Epoch: 1 | Step: 800 | Train loss: 1.682 | Train acc: 0.707 | Eval loss: 1.680 | Eval acc: 0.745 | ECE: 0.21422 | MCE: 0.38265 | ɛ: 3.69 (α: 3.6)\n","Epoch: 1 | Step: 1000 | Train loss: 1.719 | Train acc: 0.704 | Eval loss: 1.657 | Eval acc: 0.743 | ECE: 0.21359 | MCE: 0.41733 | ɛ: 3.73 (α: 3.5)\n","Epoch: 1 | Step: 1200 | Train loss: 1.705 | Train acc: 0.702 | Eval loss: 1.638 | Eval acc: 0.744 | ECE: 0.21222 | MCE: 0.37754 | ɛ: 3.75 (α: 3.5)\n","Epoch: 1 | Step: 1400 | Train loss: 1.708 | Train acc: 0.700 | Eval loss: 1.616 | Eval acc: 0.744 | ECE: 0.21203 | MCE: 0.65043 | ɛ: 3.78 (α: 3.5)\n","Epoch: 1 | Step: 1600 | Train loss: 1.711 | Train acc: 0.698 | Eval loss: 1.578 | Eval acc: 0.748 | ECE: 0.20757 | MCE: 0.65034 | ɛ: 3.80 (α: 3.5)\n","Epoch: 1 | Step: 1800 | Train loss: 1.700 | Train acc: 0.697 | Eval loss: 1.561 | Eval acc: 0.747 | ECE: 0.20791 | MCE: 0.42712 | ɛ: 3.82 (α: 3.5)\n","Epoch: 1 | Step: 2000 | Train loss: 1.691 | Train acc: 0.698 | Eval loss: 1.523 | Eval acc: 0.748 | ECE: 0.20673 | MCE: 0.38518 | ɛ: 3.85 (α: 3.5)\n","Epoch: 1 | Step: 2200 | Train loss: 1.676 | Train acc: 0.698 | Eval loss: 1.487 | Eval acc: 0.747 | ECE: 0.20583 | MCE: 0.36676 | ɛ: 3.87 (α: 3.5)\n","Epoch: 1 | Step: 2400 | Train loss: 1.673 | Train acc: 0.697 | Eval loss: 1.472 | Eval acc: 0.747 | ECE: 0.20595 | MCE: 0.38530 | ɛ: 3.89 (α: 3.5)\n","Epoch: 1 | Step: 2600 | Train loss: 1.662 | Train acc: 0.697 | Eval loss: 1.457 | Eval acc: 0.747 | ECE: 0.20339 | MCE: 0.40092 | ɛ: 3.91 (α: 3.4)\n","Epoch: 1 | Step: 2800 | Train loss: 1.661 | Train acc: 0.696 | Eval loss: 1.437 | Eval acc: 0.744 | ECE: 0.20593 | MCE: 0.65226 | ɛ: 3.92 (α: 3.4)\n","Epoch: 1 | Step: 3000 | Train loss: 1.654 | Train acc: 0.694 | Eval loss: 1.400 | Eval acc: 0.744 | ECE: 0.20414 | MCE: 0.38221 | ɛ: 3.93 (α: 3.4)\n"]}]},{"cell_type":"markdown","metadata":{"id":"qUSPuzwlSJ1E"},"source":["For the test accuracy, after training for three epochs you should expect something close to the results below.\n","\n","You can see that we can achieve quite strong privacy guarantee at epsilon=7.5 with a moderate accuracy cost of 11 percentage points compared to non-private model trained in a similar setting (upper layers only) and 16 points compared to best results we were able to achieve using the same architecture.\n","\n","*NB: When not specified, DP-SGD is trained with upper layers only*"]},{"cell_type":"markdown","metadata":{"collapsed":true,"id":"ZsFDuxTFSJ1E"},"source":["| Model | Noise multiplier | Batch size | Accuracy | Epsilon |\n","| --- | --- | --- | --- | --- |\n","| no DP, train full model | N/A | 32 | 90.1% | N/A |\n","| no DP, train upper layers only | N/A | 32 | 85.4% | N/A |\n","| DP-SGD | 1.0 | 32 | 70.5% | 0.7 |\n","| **DP-SGD (this tutorial)** | **0.4** | **32** | **74.3%** | **7.5** |\n","| DP-SGD | 0.3 | 32 | 75.8% | 20.7 |\n","| DP-SGD | 0.1 | 32 | 78.3% | 2865 |\n","| DP-SGD | 0.4 | 8 | 67.3% | 5.9 |"]},{"cell_type":"code","metadata":{"id":"q7bAdF81lts1"},"source":[""],"execution_count":null,"outputs":[]}]}