import re
import subprocess
import os

# Define the regex pattern you are searching for
pattern = r"https://nrvis.com/./download/data.*zip"  # Replace 'yourpattern' with the actual regex pattern

# Specify the file name
file_name = "page.html"  # Replace 'yourfile.txt' with the name of your file

# Specify the directory to save the downloads
download_directory = "./network_repo_datasets"
os.makedirs(download_directory, exist_ok=True)


def find_matches(filename, pattern):
    # Open the file for reading
    with open(filename, "r", encoding="utf-8") as file:
        # Read the entire content of the file
        content = file.read()

        # Find all matches using re.findall
        matches = re.findall(pattern, content)

        # Return the list of matches
        return matches


def download_files(urls, folder):
    for i, url in enumerate(urls[:]):
        # Extract the file name from the URL to use it for saving
        print(f"Downloading file {i+1}/{len(urls[:])}: {url}")
        file_name = url.split("/")[-1]
        if not file_name:
            file_name = "downloaded_file"

        # Create the complete path where the file will be saved
        save_path = os.path.join(folder, file_name)

        # Command to download the file
        # For wget use:
        command = ["wget", "-O", save_path, url]

        # For curl use:
        # command = ['curl', '-o', save_path, url]

        # Execute the download command
        subprocess.run(command, stdout=subprocess.PIPE)


# Get all URL matches
all_matches = find_matches(file_name, pattern)

# Download each URL found
download_files(all_matches, download_directory)

print("Downloaded files to:", download_directory)
