import zipfile
import os
import shutil
from ast import literal_eval
from itertools import product
from pathlib import Path
from urllib.parse import urljoin
from urllib.request import urlopen
from urllib.error import URLError

import configargparse
from tqdm import tqdm

def download(url, path, auto_mkdir=True):
    if auto_mkdir:
        path.parent.mkdir(exist_ok=True, parents=True)
    
    try:
        response = urlopen(url)
        total_size = int(response.headers.get('content-length', 0))
        
        if total_size > shutil.disk_usage(path.parent).free:
            raise IOError("Not enough disk space to download the file.")
        
        with tqdm(total=total_size, unit='iB', unit_scale=True, desc=f"Downloading {path.name}") as pbar:
            with open(path, 'wb') as file:
                for data in iter(lambda: response.read(8192), b''):
                    size = file.write(data)
                    pbar.update(size)
        
        print(f"Download of {path.name} completed successfully.")
    except URLError as e:
        print(f"Failed to download {url}. Error: {e}")
    except IOError as e:
        print(f"IO Error: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

def extract_zip(zip_path, extract_to):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            total = len(zip_ref.infolist())
            for file in tqdm(iterable=zip_ref.infolist(), total=total, desc=f"Extracting {zip_path.name}"):
                zip_ref.extract(member=file, path=extract_to)
        print(f"Extraction of {zip_path.name} completed successfully.")
    except zipfile.BadZipFile:
        print(f"Error: {zip_path} is not a valid zip file.")
    except Exception as e:
        print(f"An error occurred while extracting {zip_path}: {e}")

def download_OfficeHome(args):
    file_path = args.dataset_dir / "OfficeHome.zip"

    if file_path.exists():
        print(f"File {file_path} already exists. Skipping download.")
    else:
        download(args.url, file_path)

    extract_zip(file_path, args.dataset_dir)

    real_world_path = args.dataset_dir / "OfficeHome" / "RealWorld"
    real_path = args.dataset_dir / "OfficeHome" / "Real"
    if real_world_path.exists() and not real_path.exists():
        real_world_path.rename(real_path)
        print(f"Renamed 'RealWorld' to 'Real'")

def download_DomainNet(args):
    for domain in args.domains:
        file_path = args.dataset_dir / f"{domain}.zip"

        if file_path.exists():
            print(f"File {file_path} already exists. Skipping download.")
        else:
            url = f"{args.url}/groundtruth/{domain}.zip" if domain in ["clipart", "painting"] else f"{args.url}/{domain}.zip"
            download(url, file_path)

        dataset_sub_dir = args.dataset_dir / "DomainNet"
        dataset_sub_dir.mkdir(exist_ok=True, parents=True)
        extract_zip(file_path, dataset_sub_dir)

def download_Office31(args):
    file_path = args.dataset_dir / "Office31.zip"

    if file_path.exists():
        print(f"File {file_path} already exists. Skipping download.")
    else:
        download(args.url, file_path)

    extract_zip(file_path, args.dataset_dir)

def prepare_text_list(args):
    for (types, domain_type, suffix, output_name), domain in product(
        [
            ("labeled", "source", "", "all.txt"),
            ("validation", "target", "_3", "val.txt"),
        ],
        args.domains,
    ):
        output_path = args.dataset_dir / args.dataset / "text" / domain / output_name
        url = f"{args.text_url}/{types}_{domain_type}_images_{domain}{suffix}.txt"
        download(url, output_path)

    for (types, output_type), domain, num_labels in product(
        [("labeled", "train"), ("unlabeled", "test")],
        args.domains,
        [1, 3],
    ):
        output_path = args.dataset_dir / args.dataset / "text" / domain / f"{output_type}_{num_labels}.txt"
        url = f"{args.text_url}/{types}_target_images_{domain}_{num_labels}.txt"
        download(url, output_path)

def arguments_parsing():
    p = configargparse.ArgumentParser(
        config_file_parser_class=configargparse.YAMLConfigFileParser
    )
    p.add("--config", is_config_file=True, default="dataset.yaml")
    p.add(
        "--dataset",
        type=str,
        default="OfficeHome",
        choices=["DomainNet", "Office31", "OfficeHome"],
    )
    p.add("--text_url", type=str)
    p.add("--dataset_dir", type=Path)
    p.add("--dataset_cfg", type=literal_eval)

    args = p.parse_args()
    args.url = args.dataset_cfg[args.dataset]["url"]
    args.domains = args.dataset_cfg[args.dataset]["domains"]
    args.text_dir = args.dataset_dir / args.dataset / "text"
    args.text_url = args.text_url + (
        {
            "DomainNet": "multi",
            "Office31": "office",
            "OfficeHome": "office_home",
        }[args.dataset]
    )
    return args

if __name__ == "__main__":
    args = arguments_parsing()
    print(f"Starting download and processing for {args.dataset}")
    
    download_fn = globals()[f"download_{args.dataset}"]
    try:
        download_fn(args)
        prepare_text_list(args)
        print(f"Processing for {args.dataset} completed successfully.")
    except Exception as e:
        print(f"An error occurred during processing: {e}")
    
    print("Script execution completed.")