from kaggle.api.kaggle_api_extended import KaggleApi
from custom_dataset_preparation import *
import shutil


def prepare_all_competitions(datasets_dir_path):
    print("Prepare 'aerial-cactus-identification'")
    prepare_aerial_cactus(os.path.join(datasets_dir_path, 'aerial-cactus-identification'))
    print("Prepare 'denoising-dirty-documents'")
    prepare_denoising_dirty(os.path.join(datasets_dir_path, 'denoising-dirty-documents'))
    print("Prepare 'dog-breed-identification'")
    prepare_dog_breed_identification(os.path.join(datasets_dir_path, 'dog-breed-identification'))
    print("Prepare 'dogs-vs-cats-redux-kernels-edition'")
    prepare_dogs_vs_cats_kernels(os.path.join(datasets_dir_path, 'dogs-vs-cats-redux-kernels-edition'))
    print("Prepare 'jigsaw-toxic-comment-classification-challenge'")
    prepare_jigsaw_toxic(os.path.join(datasets_dir_path, 'jigsaw-toxic-comment-classification-challenge'))
    print("Prepare 'leaf-classification'")
    prepare_leaf_classification(os.path.join(datasets_dir_path, 'leaf-classification'))
    print("Prepare 'nomad2018-predict-transparent-conductors'")
    prepare_nomad(os.path.join(datasets_dir_path, 'nomad2018-predict-transparent-conductors'))
    print("Prepare 'random-acts-of-pizza'")
    process_pizza(os.path.join(datasets_dir_path, 'random-acts-of-pizza'))
    print("Prepare 'spooky-author-identification'")
    prepare_spooky(os.path.join(datasets_dir_path, 'spooky-author-identification'))
    print("Prepare 'text-normalization-challenge-english-language'")
    prepare_text_normalization_en(os.path.join(datasets_dir_path, 'text-normalization-challenge-english-language'))
    print("Prepare 'text-normalization-challenge-russian-language'")
    prepare_text_normalization_ru(os.path.join(datasets_dir_path, 'text-normalization-challenge-russian-language'))
    print("Prepare 'make-data-count-finding-data-references'")
    prepare_make_data_count(os.path.join(datasets_dir_path, 'make-data-count-finding-data-references'))
    print("Prepare 'wsdm-cup-multilingual-chatbot-arena'")
    prepare_wsdm_cup(os.path.join(datasets_dir_path, 'wsdm-cup-multilingual-chatbot-arena'))
    print("Prepare 'pii-detection-removal-from-educational-data'")
    prepare_pii(os.path.join(datasets_dir_path, 'pii-detection-removal-from-educational-data'))
    print("Prepare 'mlsp-2013-birds'")
    prepare_mlsp(os.path.join(datasets_dir_path, 'mlsp-2013-birds'))
    print("Prepare 'cmi-detect-behavior-with-sensor-data'")
    prepare_cmi_detect(os.path.join(datasets_dir_path, 'cmi-detect-behavior-with-sensor-data'))


def download_kaggle_competitions(comp, comp_dir):
    api = KaggleApi()
    api.authenticate()

    os.makedirs(comp_dir, exist_ok=True)
    print(f"Downloading: {comp}")

    try:
        api.competition_download_files(comp, path=comp_dir)
    except Exception as e:
        if "Forbidden for url" in str(e) or "You must accept this competition" in str(e):
            print("You must accept the competition rules before downloading the dataset.")
            print("Link: ", f"https://www.kaggle.com/c/{comp}/rules")
            input("Press Enter to continue after you have accepted the rules...")
            download_kaggle_competitions(comp, comp_dir)
        else:
            raise e


def download_and_unzip_kaggle_competitions(txt_path, download_dir):
    with open(txt_path, 'r', encoding='utf-8') as f:
        competitions = [line.strip() for line in f if line.strip()]

    for comp in competitions:
        comp_dir = os.path.join(download_dir, comp)
        download_kaggle_competitions(comp, comp_dir)

        zip_path = os.path.join(comp_dir, f"{comp}.zip")
        if os.path.exists(zip_path):
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(comp_dir)
            os.remove(zip_path)
            print(f"Unzipped and removed archive: {zip_path}")
        else:
            print(f"Zip file not found for {comp}")
        print("-" * 84 + "\n")


def move_overviews_to_folders(txt_dir, folders_dir):
    for filename in os.listdir(txt_dir):
        if filename.endswith('.txt'):
            folder_name = filename[:-4]
            src_file = os.path.join(txt_dir, filename)
            dest_folder = os.path.join(folders_dir, folder_name)
            dest_file = os.path.join(dest_folder, 'overview.txt')

            if os.path.isdir(dest_folder):
                shutil.copy(src_file, dest_file)
                print(f"Copied {filename} to {dest_file}")
            else:
                print(f"Folder {dest_folder} does not exist. Skipping.")


if __name__ == "__main__":
    print("1. Download competitions")
    print("~" * 30)
    download_and_unzip_kaggle_competitions('competitions.txt', './kaggle_data')

    print("2. Prepare competitions")
    print("~" * 30)
    prepare_all_competitions('./kaggle_data')
    move_overviews_to_folders('./overviews', './kaggle_data')
    print("~" * 30)
