import os
import tarfile
import urllib.request

# Define the base URL for the GLDv2 dataset
base_url = 'https://s3.amazonaws.com/google-landmark/'

# Generate the list of file names
train_files = [f'train/images_{i:03d}.tar' for i in range(500)]

# Define the directory to store the dataset
data_dir = '/net/scratch/user/landmark_backup/'

# Create the directory if it doesn't exist
os.makedirs(data_dir, exist_ok=True)

# Download and extract the dataset
for file in train_files:
    url = base_url + file
    filename = file.split('/')[-1]
    filepath = os.path.join(data_dir, filename)
    if not os.path.exists(filepath):
        print(f'Downloading {filename}...')
        urllib.request.urlretrieve(url, filepath)
    if filepath.endswith(".tar"):
        print(f'Extracting {filename}...')
        with tarfile.open(filepath) as tar:
            tar.extractall(path=data_dir)
