import os
import urllib.request
import zipfile
import tempfile

def download_sci2pol_tasks():
    # Convert the Dropbox share link to direct download link
    dropbox_url = "https://www.dropbox.com/scl/fo/rcnl89pat1oy4zdx9mno9/ANjoCRV89yYM4E7AzDOFXIk?rlkey=u19ss3oe20tm7oak18eh6eaex&st=c8nj3tfc&dl=1"
    save_dir = "../sci2pol_data"
    os.makedirs(save_dir, exist_ok=True)

    print("Downloading Sci2Pol dataset from Dropbox...")
    
    try:
        # Download the zip file to a temporary location
        with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as temp_file:
            print("Downloading zip file...")
            urllib.request.urlretrieve(dropbox_url, temp_file.name)
            temp_zip_path = temp_file.name
        
        # Extract the zip file
        print("Extracting files...")
        with zipfile.ZipFile(temp_zip_path, 'r') as zip_ref:
            # List all files in the zip
            file_list = zip_ref.namelist()
            
            # Extract only .jsonl files to the save directory
            for file_name in file_list:
                if file_name.endswith('.jsonl') and 'task' in file_name:
                    # Extract just the filename without any folder structure
                    base_name = os.path.basename(file_name)
                    save_path = os.path.join(save_dir, base_name)
                    
                    if not os.path.exists(save_path):
                        print(f"Extracting {base_name}...")
                        with zip_ref.open(file_name) as source, open(save_path, 'wb') as target:
                            target.write(source.read())
                        print(f"✅ Saved to {save_path}")
                    else:
                        print(f"✔️ {base_name} already exists, skipping.")
        
        # Clean up temporary file
        os.unlink(temp_zip_path)
        print("🎉 All task files processed successfully!")
        
    except Exception as e:
        print(f"❌ Failed to download from Dropbox: {e}")
        print("Please check the Dropbox link or try downloading manually.")
        print("Manual download instructions:")
        print("1. Visit the Dropbox link in your browser")
        print("2. Click 'Download' to get the zip file")
        print("3. Extract the task*.jsonl files to ../sci2pol_data/")

download_sci2pol_tasks()
