
import pandas as pd
import requests
import threading
import os
import time
from urllib.parse import urlparse
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

def download_image(url, download_dir, log_file, delay_between_requests=1):
    '''
    Downloads a single image from the given URL, saves it to the specified directory, 
    and logs errors if any occur.
    
    Args:
    url (str): The image URL to download.
    download_dir (str): The directory where the image will be saved.
    log_file (str): The file to log any failed downloads.
    delay_between_requests (float): Delay between requests in seconds (default: 1 second).
    '''
    try:
        retry_strategy = Retry(
            total=3,  # Retry up to 3 times
            status_forcelist=[429, 500, 502, 503, 504],
            backoff_factor=1,  # Exponential backoff
            method_whitelist=["GET"]
        )
        adapter = HTTPAdapter(max_retries=retry_strategy)
        session = requests.Session()
        session.mount("http://", adapter)
        session.mount("https://", adapter)
        
        response = session.get(url, timeout=10)
        
        # Ensure rate limiting
        time.sleep(delay_between_requests)
        
        if response.status_code == 200:
            filename = os.path.basename(urlparse(url).path)
            file_path = os.path.join(download_dir, filename)
            with open(file_path, 'wb') as file:
                file.write(response.content)
            print(f"Downloaded {filename}")
        else:
            raise Exception(f"Failed to download {url}, Status Code: {response.status_code}")
    except Exception as e:
        print(f"Error: {e}")
        with open(log_file, 'a') as log:
            log.write(f"{url} - {str(e)}\n")

def download_images_from_csv(csv_file, url_column="url", download_dir="images", log_file="download_errors.log", threads=1, delay_between_requests=1):
    '''
    Downloads images from a CSV file containing URLs, using multithreading and rate-limiting.

    Args:
    csv_file (str): Path to the CSV file with URLs.
    url_column (str): The column in the CSV file containing the URLs (default: "url").
    download_dir (str): Directory to save downloaded images (default: "images").
    log_file (str): File to log any failed downloads (default: "download_errors.log").
    threads (int): Number of threads to use for downloading (default: 1).
    delay_between_requests (float): Delay between requests in seconds (default: 1 second).
    '''
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)
    
    data = pd.read_csv(csv_file)
    urls = data[url_column].dropna().tolist()

    if threads > 1:
        thread_list = []
        for i in range(threads):
            thread_urls = urls[i::threads]
            thread = threading.Thread(target=download_batch, args=(thread_urls, download_dir, log_file, delay_between_requests))
            thread_list.append(thread)
            thread.start()
        
        for thread in thread_list:
            thread.join()
    else:
        download_batch(urls, download_dir, log_file, delay_between_requests)

def download_batch(urls, download_dir, log_file, delay_between_requests):
    '''
    Downloads a batch of images from the given list of URLs.

    Args:
    urls (list): List of image URLs to download.
    download_dir (str): Directory to save downloaded images.
    log_file (str): File to log any failed downloads.
    delay_between_requests (float): Delay between requests in seconds (default: 1 second).
    '''
    for url in urls:
        download_image(url, download_dir, log_file, delay_between_requests)

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="Download images from URLs in a CSV file with rate-limiting and multithreading.")
    parser.add_argument("csv_file", help="Path to the CSV file containing image URLs.")
    parser.add_argument("download_dir", help="Directory where images will be downloaded.")
    parser.add_argument("--url_column", default="url", help="Column name containing image URLs in the CSV file (default: 'url').")
    parser.add_argument("--log_file", default="download_errors.log", help="File to log failed downloads (default: 'download_errors.log').")
    parser.add_argument("--threads", type=int, default=1, help="Number of download threads to use (default: 1).")
    parser.add_argument("--delay", type=float, default=1.0, help="Delay between download requests in seconds (default: 1 second).")

    args = parser.parse_args()

    download_images_from_csv(args.csv_file, args.url_column, args.download_dir, args.log_file, args.threads, args.delay)
