import os
import huggingface_hub
from huggingface_hub import snapshot_download

os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
# from huggingface_hub import login
# login()
# huggingface_hub.login("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")

splits = ["train", "valid"]
for split in splits:
    wait_list = [f"{str(i).zfill(3)}.tgz" for i in range(200)]
    os.makedirs(f"YOUR_ROOT_PATH/data/MLLM/IC/JourneyDB/data/{split}/imgs", exist_ok=True)
    while True:
        downloaded_list = os.listdir(f"YOUR_ROOT_PATH/data/MLLM/IC/JourneyDB/data/{split}/imgs")
        downloaded_list.sort(key=lambda x: int(x.split('.')[0]))
        downloading_list = [item for item in wait_list if item not in downloaded_list]
        print("#########################")
        print(f"downloading_list: {downloading_list}")
        if len(downloading_list) == 0:
            break
        for downloading_item in downloading_list:
            print("#########################")
            print(f"downloading_item: data/{split}/imgs/{downloading_item}")
            try:
                snapshot_download(
                    repo_id="JourneyDB/JourneyDB",
                    repo_type="dataset",
                    local_dir="YOUR_ROOT_PATH/data/MLLM/IC/JourneyDB",
                    local_dir_use_symlinks=False,
                    resume_download=True,
                    force_download=False,
                    etag_timeout=120,
                    # allow_patterns=["*.jsonl.tgz"],
                    allow_patterns=[f"data/{split}/imgs/{downloading_item}"],
                    # ignore_patterns="data/test/*",
                    max_workers=3,
                )
            except Exception as e:
                print(e)
                continue

"""
export HF_HUB_ENABLE_HF_TRANSFER=1
export HF_ENDPOINT="https://hf-mirror.com"
huggingface-cli download --token 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 --resume-download --repo-type dataset JourneyDB/JourneyDB --local-dir YOUR_ROOT_PATH/data/MLLM/IC/JourneyDB --local-dir-use-symlinks False 
"""