#%%
import os
import requests
import zipfile
from tqdm import tqdm

urls = [
    "https://ml-inat-competition-datasets.s3.amazonaws.com/2017/train_2017_bboxes.zip",
    "https://ml-inat-competition-datasets.s3.amazonaws.com/2017/val_2017_bboxes.zip",
    "https://ml-inat-competition-datasets.s3.amazonaws.com/2017/inat2017_locations.zip"
    "https://ml-inat-competition-datasets.s3.amazonaws.com/2017/train_val_images.tar.gz"
]

output_dir = "data"
os.makedirs(output_dir, exist_ok=True)

for url in urls:
    filename = os.path.join(output_dir, url.split("/")[-1])
    extract_dir = os.path.join(output_dir, os.path.splitext(os.path.basename(filename))[0])

    if not os.path.exists(filename):
        response = requests.get(url, stream=True)
        total_size = int(response.headers.get('content-length', 0))
        block_size = 1024

        with open(filename, "wb") as f, tqdm(
            desc=f"Downloading {filename}",
            total=total_size,
            unit='B',
            unit_scale=True,
            unit_divisor=1024,
        ) as bar:
            for data in response.iter_content(block_size):
                f.write(data)
                bar.update(len(data))
    else:
        print(f"Already downloaded: {filename}")

    # Unzip
    if filename.endswith(".zip"):
        print(f"Unzipping {filename} ...")
        with zipfile.ZipFile(filename, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)
        print(f"Extracted to {extract_dir}")

print("All downloads and extractions completed.")

#%%
import tarfile
import os

tar_path = "downloads/train_val_images.tar.gz"
output_dir = "downloads/new_train_val_images"
os.makedirs(output_dir, exist_ok=True)

with tarfile.open(tar_path, "r:gz") as tar:
    tar.extractall(output_dir)

print("Extraction completed.")
