#!/usr/bin/env python3
"""
Script to download all files from Google Drive folder
XXXX
"""

import os
import sys
import subprocess
import json
from pathlib import Path
import time

def install_gdown():
    """Install gdown if not already installed"""
    try:
        import gdown
        print("✓ gdown is already installed")
    except ImportError:
        print("Installing gdown...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", "gdown"])
        print("✓ gdown installed successfully")

def get_folder_id_from_url(url):
    """Extract folder ID from Google Drive URL"""
    # URL format: XXXX
    if "folders/" in url:
        folder_id = url.split("folders/")[1].split("?")[0].split("/")[0]
        return folder_id
    else:
        raise ValueError("Invalid Google Drive folder URL")

def download_folder(folder_url, output_dir="downloaded_files", max_size_mb=100):
    """
    Download files from a Google Drive folder that are smaller than max_size_mb
    
    Args:
        folder_url (str): Google Drive folder URL
        output_dir (str): Directory to save downloaded files
        max_size_mb (int): Maximum file size in MB to download
    """
    try:
        import gdown
    except ImportError:
        print("Error: gdown not installed. Please run install_gdown() first.")
        return False
    
    # Create output directory
    Path(output_dir).mkdir(exist_ok=True)
    
    # Get folder ID from URL
    folder_id = get_folder_id_from_url(folder_url)
    print(f"Folder ID: {folder_id}")
    print(f"Maximum file size to download: {max_size_mb} MB")
    
    # Based on the folder contents, we know the smaller files are the "o4mini" versions
    # These are the files under 100MB that we want to download
    small_files = [
        ("final_queries_o4mini_x_cnbc.json", "33.6 MB"),
        ("final_queries_o4mini_x_cnn.json", "29.7 MB"),
        ("final_queries_o4mini_x_dw.json", "25.2 MB"),
        ("final_queries_o4mini_x_guardian.json", "37.6 MB")
    ]
    
    print(f"Attempting to download {len(small_files)} files under {max_size_mb} MB...")
    
    downloaded_count = 0
    failed_count = 0
    
    # First, try to download the entire folder and then filter
    print("Downloading folder contents...")
    try:
        gdown.download_folder(
            url=f"XXXX{folder_id}",
            output=output_dir,
            quiet=False,
            use_cookies=False,
            remaining_ok=True
        )
        
        # Now check which files were downloaded and their sizes
        print("\nChecking downloaded files...")
        for filename, expected_size in small_files:
            output_path = os.path.join(output_dir, filename)
            
            if os.path.exists(output_path):
                file_size_mb = os.path.getsize(output_path) / (1024 * 1024)
                if file_size_mb <= max_size_mb:
                    print(f"✓ Downloaded: {filename} ({file_size_mb:.2f} MB)")
                    downloaded_count += 1
                else:
                    print(f"✗ File too large: {filename} ({file_size_mb:.2f} MB)")
                    os.remove(output_path)
                    failed_count += 1
            else:
                print(f"✗ File not found: {filename}")
                failed_count += 1
        
        # # Remove any large files that might have been downloaded
        # print("\nCleaning up large files...")
        # for file_path in Path(output_dir).glob("*"):
        #     if file_path.is_file():
        #         file_size_mb = file_path.stat().st_size / (1024 * 1024)
        #         if file_size_mb > max_size_mb:
        #             print(f"Removing large file: {file_path.name} ({file_size_mb:.2f} MB)")
        #             file_path.unlink()
        
    except Exception as e:
        print(f"Error during folder download: {e}")
        return False
    
    print(f"\nDownload Summary:")
    print(f"✓ Downloaded: {downloaded_count} files")
    print(f"✗ Failed: {failed_count} files")
    
    if downloaded_count > 0:
        print("✓ Download completed successfully!")
        return True
    else:
        print("No files were downloaded.")
        return False

def list_downloaded_files(output_dir="downloaded_files"):
    """List all downloaded files with their sizes"""
    if not os.path.exists(output_dir):
        print(f"Directory {output_dir} does not exist")
        return
    
    print(f"\nDownloaded files in {output_dir}:")
    print("-" * 50)
    
    total_size = 0
    for file_path in Path(output_dir).rglob("*"):
        if file_path.is_file():
            size = file_path.stat().st_size
            total_size += size
            size_mb = size / (1024 * 1024)
            print(f"{file_path.name}: {size_mb:.2f} MB")
    
    total_mb = total_size / (1024 * 1024)
    print("-" * 50)
    print(f"Total size: {total_mb:.2f} MB")

def main():
    """Main function"""
    folder_url = "XXXX"
    # folder_url = "XXXX"
    output_dir = "downloaded_files"
    max_size_mb = 100  # Only download files smaller than 100MB
    
    print("Google Drive File Downloader")
    print("=" * 40)
    print(f"Folder URL: {folder_url}")
    print(f"Output directory: {output_dir}")
    print(f"Maximum file size: {max_size_mb} MB")
    print()
    
    # Install gdown if needed
    install_gdown()
    print()
    
    # Download files
    success = download_folder(folder_url, output_dir, max_size_mb)
    
    if success:
        print()
        list_downloaded_files(output_dir)
    else:
        print("Download failed. Please check the error messages above.")

if __name__ == "__main__":
    main() 