{"cells":[{"cell_type":"markdown","metadata":{"id":"-iELaqNhWLtO"},"source":["# Downloading the ClimateNet dataset"]},{"cell_type":"code","execution_count":6,"metadata":{"id":"lV6WZL8zKxmT"},"outputs":[],"source":["import requests, os, bs4\n","from bs4 import BeautifulSoup\n","\n","### List all links to NetCDF files at a given url\n","\n","def list_nc_datasets(index_url):\n","\n","    # Parse target url\n","    reqs = requests.get(index_url)\n","    soup = BeautifulSoup(reqs.text, 'html.parser')\n","\n","    # Find all link tags in the page and list their target href\n","    urls = [] \n","\n","    for link in soup.find_all('a'):\n","        urls.append(link.get('href'))\n","\n","    # Keep only links to NetCDF file\n","    nc_data_urls = [x for x in urls if x.endswith('.nc')]\n","\n","    return nc_data_urls\n","\n","### Download a file to Google Drive\n","\n","def download_file_gdrive(index_url, file_url, dest_dir):\n","\n","    # Create folder\n","    os.makedirs('./data/'+dest_dir, exist_ok=True)\n","\n","    # Stream GET request\n","    r = requests.get(index_url+file_url, stream = True)\n","    blocks = []  \n","\n","    # Save the image to folder\n","    with open(os.path.join('./data/'+dest_dir, os.path.basename(file_url)), \"wb\") as file:\n","\n","        for block in r.iter_content(chunk_size = None):\n","            if block:\n","                blocks.append(block)\n","\n","        file.write(b''.join(blocks))\n","\n","        # Display file size\n","        file.seek(0, os.SEEK_END)\n","        print (\"Download complete: \"+file_url+\" – Size: \"+str(file.tell())+\" bytes.\")\n","        file.close()\n","    \n","    return\n","\n","### Download all NetCDF files rom a target url ###\n","\n","def download_climate_net(index_url, dest_dir):\n","\n","    nc_data_urls = list_nc_datasets(index_url)\n","\n","    for i, file_url in enumerate(nc_data_urls):\n","        print(str(i+1)+\"/\"+str(len(nc_data_urls)), end=\" \")\n","        download_file_gdrive(index_url, file_url, dest_dir)\n","\n","    return"]},{"cell_type":"markdown","metadata":{"id":"yvmE60ylW1EJ"},"source":["### Downloading test dataset\n","\n"]},{"cell_type":"code","execution_count":7,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":168944,"status":"ok","timestamp":1665699549482,"user":{"displayName":"Romain Lacombe","userId":"12776652903382944332"},"user_tz":-120},"id":"r29L8jO1Kx3-","outputId":"f8496eaa-f10c-453f-8592-bcca15b2cab7"},"outputs":[{"name":"stdout","output_type":"stream","text":["1/61 Download complete: data-2011-06-01-01-1_0.nc – Size: 63744786 bytes.\n","2/61 Download complete: data-2011-06-03-01-1_0.nc – Size: 63744786 bytes.\n","3/61 Download complete: data-2011-06-03-01-1_1.nc – Size: 63744786 bytes.\n","4/61 Download complete: data-2011-06-16-01-1_0.nc – Size: 63744786 bytes.\n","5/61 Download complete: data-2011-07-26-01-1_0.nc – Size: 63744786 bytes.\n","6/61 Download complete: data-2011-07-26-01-1_1.nc – Size: 63744786 bytes.\n","7/61 Download complete: data-2011-07-29-01-1_0.nc – Size: 63744786 bytes.\n","8/61 Download complete: data-2011-08-08-01-1_0.nc – Size: 63744786 bytes.\n","9/61 Download complete: data-2011-08-08-01-1_1.nc – Size: 63744786 bytes.\n","10/61 Download complete: data-2011-09-06-01-1_0.nc – Size: 63744786 bytes.\n","11/61 Download complete: data-2011-09-06-01-1_1.nc – Size: 63744786 bytes.\n","12/61 Download complete: data-2011-09-09-01-1_0.nc – Size: 63744786 bytes.\n","13/61 Download complete: data-2011-09-09-01-1_1.nc – Size: 63744786 bytes.\n","14/61 Download complete: data-2011-09-09-01-1_2.nc – Size: 63744786 bytes.\n","15/61 Download complete: data-2011-09-09-01-1_3.nc – Size: 63744786 bytes.\n","16/61 Download complete: data-2011-09-12-01-1_0.nc – Size: 63744786 bytes.\n","17/61 Download complete: data-2011-09-30-01-1_0.nc – Size: 63744786 bytes.\n","18/61 Download complete: data-2011-09-30-01-1_1.nc – Size: 63744786 bytes.\n","19/61 Download complete: data-2011-09-30-01-1_2.nc – Size: 63744786 bytes.\n","20/61 Download complete: data-2011-09-30-01-1_3.nc – Size: 63744786 bytes.\n","21/61 Download complete: data-2011-10-10-01-1_0.nc – Size: 63744786 bytes.\n","22/61 Download complete: data-2011-10-10-01-1_1.nc – Size: 63744786 bytes.\n","23/61 Download complete: data-2011-10-30-01-1_0.nc – Size: 63744786 bytes.\n","24/61 Download complete: data-2012-06-18-01-1_0.nc – Size: 63744786 bytes.\n","25/61 Download complete: data-2012-06-24-01-1_0.nc – Size: 63744786 bytes.\n","26/61 Download complete: data-2012-06-24-01-1_1.nc – Size: 63744786 bytes.\n","27/61 Download complete: data-2012-06-24-01-1_2.nc – Size: 63744786 bytes.\n","28/61 Download complete: data-2012-06-24-01-1_3.nc – Size: 63744786 bytes.\n","29/61 Download complete: data-2012-06-29-01-1_0.nc – Size: 63744786 bytes.\n","30/61 Download complete: data-2012-07-11-01-1_0.nc – Size: 63744786 bytes.\n","31/61 Download complete: data-2012-07-11-01-1_1.nc – Size: 63744786 bytes.\n","32/61 Download complete: data-2012-07-11-01-1_2.nc – Size: 63744786 bytes.\n","33/61 Download complete: data-2012-07-21-01-1_0.nc – Size: 63744786 bytes.\n","34/61 Download complete: data-2012-08-07-01-1_0.nc – Size: 63744786 bytes.\n","35/61 Download complete: data-2012-08-07-01-1_1.nc – Size: 63744786 bytes.\n","36/61 Download complete: data-2012-08-07-01-1_2.nc – Size: 63744786 bytes.\n","37/61 Download complete: data-2012-08-09-01-1_0.nc – Size: 63744786 bytes.\n","38/61 Download complete: data-2012-08-17-01-1_0.nc – Size: 63744786 bytes.\n","39/61 Download complete: data-2012-08-17-01-1_1.nc – Size: 63744786 bytes.\n","40/61 Download complete: data-2012-10-10-01-1_0.nc – Size: 63744786 bytes.\n","41/61 Download complete: data-2012-10-18-01-1_0.nc – Size: 63744786 bytes.\n","42/61 Download complete: data-2012-10-18-01-1_1.nc – Size: 63744786 bytes.\n","43/61 Download complete: data-2012-10-18-01-1_2.nc – Size: 63744786 bytes.\n","44/61 Download complete: data-2013-06-15-01-1_0.nc – Size: 63744786 bytes.\n","45/61 Download complete: data-2013-06-15-01-1_1.nc – Size: 63744786 bytes.\n","46/61 Download complete: data-2013-06-27-01-1_0.nc – Size: 63744786 bytes.\n","47/61 Download complete: data-2013-06-27-01-1_1.nc – Size: 63744786 bytes.\n","48/61 Download complete: data-2013-06-27-01-1_2.nc – Size: 63744786 bytes.\n","49/61 Download complete: data-2013-07-03-01-1_0.nc – Size: 63744786 bytes.\n","50/61 Download complete: data-2013-07-03-01-1_1.nc – Size: 63744786 bytes.\n","51/61 Download complete: data-2013-08-01-01-1_0.nc – Size: 63744786 bytes.\n","52/61 Download complete: data-2013-08-01-01-1_1.nc – Size: 63744786 bytes.\n","53/61 Download complete: data-2013-08-01-01-1_2.nc – Size: 63744786 bytes.\n","54/61 Download complete: data-2013-08-13-01-1_0.nc – Size: 63744786 bytes.\n","55/61 Download complete: data-2013-08-20-01-1_0.nc – Size: 63744786 bytes.\n","56/61 Download complete: data-2013-08-20-01-1_1.nc – Size: 63744786 bytes.\n","57/61 Download complete: data-2013-09-04-01-1_0.nc – Size: 63744786 bytes.\n","58/61 Download complete: data-2013-09-04-01-1_1.nc – Size: 63744786 bytes.\n","59/61 Download complete: data-2013-09-16-01-1_0.nc – Size: 63744786 bytes.\n","60/61 Download complete: data-2013-09-16-01-1_1.nc – Size: 63744786 bytes.\n","61/61 Download complete: data-2013-09-27-01-1_0.nc – Size: 63744786 bytes.\n"]}],"source":["download_climate_net('https://portal.nersc.gov/project/ClimateNet/climatenet_new/test/', 'test')"]},{"attachments":{},"cell_type":"markdown","metadata":{"id":"Ichi1HEWavPk"},"source":["## Downloading train dataset\n","\n","We do not need it for now"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1129129,"status":"ok","timestamp":1665700707505,"user":{"displayName":"Romain Lacombe","userId":"12776652903382944332"},"user_tz":-120},"id":"h0wE2ZeOaY4w","outputId":"af0abc96-d973-4706-917f-bf1324899a67"},"outputs":[],"source":["#download_climate_net('https://portal.nersc.gov/project/ClimateNet/climatenet_new/train/', 'Train')"]}],"metadata":{"colab":{"collapsed_sections":[],"provenance":[]},"kernelspec":{"display_name":"Python 3.9.13 ('base')","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.9.16"},"vscode":{"interpreter":{"hash":"edba350efdf17bfba555f05b6b4027cadc14ea1d4eb1dc56574b8011323c73bc"}}},"nbformat":4,"nbformat_minor":0}
