
# import subprocess
import os
from datetime import datetime
import json
import base64
import time
import uuid
import requests

SCRAPER_KEY = os.environ.get('SCRAPER_KEY')
assert SCRAPER_KEY is not None, "Please set SCRAPER_KEY environment variable"


proxy_num = 0
scriper_num = 0
jina_num = 0

def download_pdf(url, timeout=300):
    global proxy_num, scriper_num, jina_num
    # 创建输出目录
    output_dir = "PDF_PATH"
    os.makedirs(output_dir, exist_ok=True)
    
    # 生成唯一文件名
    current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"document_{current_time}_{str(uuid.uuid4())}.pdf"
    file_path = os.path.join(output_dir, filename)
    
    max_retries = 1

    def try_scraper_download():
        scraper_payload = {'api_key': SCRAPER_KEY, 'url': url}
        
        for attempt in range(max_retries):
            try:
                response = requests.get('https://api.scraperapi.com/', params=scraper_payload, timeout=timeout)
                response.raise_for_status()

                with open(file_path, 'wb') as f:
                    f.write(response.content)

                return file_path

            except Exception as e:
                if attempt == max_retries - 1:
                    print(f"ScraperAPI download failed: {str(e)}")
                    return None
                # time.sleep(1)

    result = try_scraper_download()
    if result:
        scriper_num += 1
        print(f"proxy_num | scriper_num | jina_num: {proxy_num} | {scriper_num} | {jina_num}")
        return result

    jina_num += 1
    print(f"proxy_num | scriper_num | jina_num: {proxy_num} | {scriper_num} | {jina_num}")
    return None


