from playwright.sync_api import sync_playwright
import logging
from typing import Optional
import time

# 设置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36",
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'en-US,en;q=0.5',
    'Connection': 'keep-alive',
    'Cookie': 'OptanonAlertBoxClosed=2024-12-02T05:43:25.876Z; MAID=7SztV6y3n2LkV/lHzOiE7A==; at_check=true; AMCVS_4D6368F454EC41940A4C98A6%40AdobeOrg=1; MACHINE_LAST_SEEN=2024-12-19T18%3A39%3A13.105-08%3A00; JSESSIONID=aaadJ4s6vmRN7a364Fypz; cf_clearance=jrsR.Lt3e4UwkSzoTuiikVsXb0dX.Fx4kyI5gRKLxME-1734663979-1.2.1.1-J_ZXvzYVlcnKoNM.bU1mOYTPMdiNPZgifmULkfUQObI.bFOA2haCW9UIwd1L9VW5dPfTVmBXcS5BDgnytt.UNRRwKv7I46td7WYvs8iyTgRSmB1DC.AqZrOjof560ahX3FrxIN_DxRvlPbJ8EtU1g2c18lgAi1qlZoWoW24cjgmk.FWiHsqKaE1UeIAmGslp7qe8XtRL1J3m0YvgZRJ0InyogMFNDr.yo405QqonbDaA6GhHA341imAEH0fSWjys4oaLeCRagyCpnEXTzDfvennRsb21peKoEk5btedrVp8ldB8eIcwp8GPsnUIaig5G2n6rBR4FCxNgF9pN2geg.WJv3oLQ3qdhXYPhsFB28i2UIXO9zN0iTwcyZDuxMptNoaXEH52s82uw18xBFd.6Gg; AMCV_4D6368F454EC41940A4C98A6%40AdobeOrg=179643557%7CMCIDTS%7C20078%7CMCMID%7C82070259187192443663734658279114766445%7CMCAID%7CNONE%7CMCOPTOUT-1734671189s%7CNONE%7CvVersion%7C5.5.0; OptanonConsent=isGpcEnabled=0&datestamp=Fri+Dec+20+2024+11%3A06%3A29+GMT%2B0800+(%E4%B8%AD%E5%9B%BD%E6%A0%87%E5%87%86%E6%97%B6%E9%97%B4)&version=202402.1.0&browserGpcFlag=0&isIABGlobal=false&hosts=&consentId=a7b9ace1-acfe-4d1b-933f-36af08d827f1&interactionCount=1&isAnonUser=1&landingPath=NotLandingPage&groups=1%3A1%2C3%3A1%2C2%3A1%2C4%3A1&geolocation=%3B&AwaitingReconsent=false'
}

def expand_volume(volume_number: int) -> None:
    """
    访问期刊网站并展开指定卷号的期刊列表
    
    Args:
        volume_number: 要展开的卷号
    """
    global headers
    with sync_playwright() as p:
        try:
            # 启动浏览器
            browser = p.chromium.launch(headless=True,args=[
                    '--disable-blink-features=AutomationControlled',  # 禁用自动化标记
                ])  # 设置headless=True可以在后台运行
            # context = browser.new_context()
            context = browser.new_context(
                user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
                viewport={'width': 1920, 'height': 1080},
            )
            page = context.new_page()


            page.set_extra_http_headers(headers)
            # 访问网页
            url = "https://www.cell.com/developmental-cell/issues"
            logger.info(f"Accessing URL: {url}")
            page.goto(url)
            
            # 等待页面加载
            page.wait_for_load_state("networkidle")
            # 保存页面内容
            with open("page_content.html", "w", encoding="utf-8") as file:
                file.write(page.content())
            page.screenshot(path="page.png", full_page=True)
            logger.info("Page loaded")
            # print(page.content())
            # 构建选择器并等待元素出现
            volume_selector = f"a:has-text('Volume {volume_number}')"
            volume_element = page.locator(volume_selector)
            
            # 确保元素可见
            volume_element.scroll_into_view_if_needed()
            
            # 点击展开Volume
            logger.info(f"Clicking Volume {volume_number}")
            volume_element.click()

            
            # 等待内容加载
            logger.info("Waiting for content to load...")
            page.screenshot(path="page2.png", full_page=True)
            # 等待特定卷的issues列表加载完成
            issues_selector = f"div[data-groupid='d2020.v{volume_number}'] .list-of-issues__list"
            page.wait_for_selector(issues_selector, state="visible", timeout=30000)
            
            # 验证内容是否已加载
            issues_element = page.locator(issues_selector)
            if not issues_element.is_visible():
                raise Exception("Issues list not visible after clicking")
                
            # 可以进一步验证是否有具体的issue内容
            issues_count = issues_element.locator("li").count()
            logger.info(f"Found {issues_count} issues in Volume {volume_number}")
            
            # 保存页面内容
            output_path = f"volume_{volume_number}.html"
            with open(output_path, "w", encoding="utf-8") as file:
                file.write(page.content())
            logger.info(f"Saved HTML content to {output_path}")

            # 保持页面打开一会儿以便查看结果
            time.sleep(5)
            
        except Exception as e:
            logger.error(f"Failed to expand volume: {str(e)}")
            raise
        finally:
            # 关闭浏览器
            if 'browser' in locals():
                browser.close()
                logger.info("Browser closed")

def main():
    try:
        expand_volume(57)  # 展开Volume 58
    except Exception as e:
        logger.error(f"Main execution failed: {str(e)}")

if __name__ == "__main__":
    main()