#%%
import os
import tarfile
import urllib.request

url = "https://cvit.iiit.ac.in/images/Projects/SceneTextUnderstanding/IIIT5K-Word_V3.0.tar.gz"
filename = "IIIT5K-Word_V3.0.tar.gz"
extract_dir = "IIIT5K"

if not os.path.exists(filename):
    print("Downloading dataset...")
    urllib.request.urlretrieve(url, filename)
    print("Download complete.")
else:
    print("File already exists. Skipping download.")

if not os.path.exists(extract_dir):
    print("Extracting files...")
    with tarfile.open(filename, "r:gz") as tar:
        tar.extractall(extract_dir)
    print("Extraction complete.")
else:
    print("Directory already exists. Skipping extraction.")


