#!/usr/bin/env python3
"""
arXiv MCP Server
Use/UsingarXiv APIProvide学术论文Search和Info/Information
"""

import json
import requests
import xml.etree.ElementTree as ET
from datetime import datetime
from fastmcp import FastMCP

# Create MCP server
mcp = FastMCP("arXiv Academic Papers")

# arXiv API基础URL
ARXIV_API_URL = "http://export.arxiv.org/api/query"

def parse_arxiv_response(xml_content):
    """解析arXiv API的XMLResponse"""
    try:
        root = ET.fromstring(xml_content)
        
        # 定义命名空间
        namespaces = {
            'atom': 'http://www.w3.org/2005/Atom',
            'arxiv': 'http://arxiv.org/schemas/atom'
        }
        
        papers = []
        entries = root.findall('atom:entry', namespaces)
        
        for entry in entries:
            # 基本Info/Information
            title = entry.find('atom:title', namespaces)
            title_text = title.text.strip().replace('\n', ' ') if title is not None else ""
            
            summary = entry.find('atom:summary', namespaces)
            summary_text = summary.text.strip().replace('\n', ' ') if summary is not None else ""
            
            # 作者Info/Information
            authors = []
            for author in entry.findall('atom:author', namespaces):
                name = author.find('atom:name', namespaces)
                if name is not None:
                    authors.append(name.text)
            
            # 链接Info/Information
            arxiv_id = ""
            pdf_url = ""
            abs_url = ""
            
            for link in entry.findall('atom:link', namespaces):
                href = link.get('href', '')
                title = link.get('title', '')
                
                if 'abs' in href:
                    abs_url = href
                    # 从URL中提取arXiv ID
                    arxiv_id = href.split('/')[-1]
                elif 'pdf' in href or title == 'pdf':
                    pdf_url = href
            
            # 发布和更新日期
            published = entry.find('atom:published', namespaces)
            published_text = published.text if published is not None else ""
            
            updated = entry.find('atom:updated', namespaces)
            updated_text = updated.text if updated is not None else ""
            
            # 分类Info/Information
            categories = []
            for category in entry.findall('atom:category', namespaces):
                term = category.get('term', '')
                if term:
                    categories.append(term)
            
            # 主分类
            primary_category = entry.find('arxiv:primary_category', namespaces)
            primary_cat = primary_category.get('term', '') if primary_category is not None else ""
            
            papers.append({
                "id": arxiv_id,
                "title": title_text,
                "authors": authors,
                "summary": summary_text,
                "published": published_text,
                "updated": updated_text,
                "categories": categories,
                "primary_category": primary_cat,
                "abs_url": abs_url,
                "pdf_url": pdf_url
            })
        
        return papers
    except Exception as e:
        return {"error": f"解析XMLFailed: {str(e)}"}

@mcp.tool()
def search_papers(query: str, max_results: int = 10, sort_by: str = "relevance"):
    """SearcharXiv论文"""
    if max_results > 100:
        max_results = 100
    
    params = {
        "search_query": query,
        "start": 0,
        "max_results": max_results,
        "sortBy": sort_by,
        "sortOrder": "descending"
    }
    
    try:
        response = requests.get(ARXIV_API_URL, params=params, timeout=30)
        
        if response.status_code == 200:
            papers = parse_arxiv_response(response.content)
            
            if isinstance(papers, dict) and "error" in papers:
                return papers
            
            return {
                "query": query,
                "total_results": len(papers),
                "sort_by": sort_by,
                "papers": papers
            }
        else:
            return {"error": f"API调用Failed: HTTP {response.status_code}"}
    except Exception as e:
        return {"error": f"SearchFailed: {str(e)}"}

@mcp.tool()
def search_by_author(author_name: str, max_results: int = 10):
    """按作者Search论文"""
    query = f"au:{author_name}"
    return search_papers(query, max_results, "submittedDate")

@mcp.tool()
def search_by_category(category: str, max_results: int = 10):
    """按分类Search论文"""
    query = f"cat:{category}"
    return search_papers(query, max_results, "submittedDate")

@mcp.tool()
def search_by_title(title: str, max_results: int = 10):
    """按标题Search论文"""
    query = f"ti:{title}"
    return search_papers(query, max_results, "relevance")

@mcp.tool()
def get_paper_by_id(arxiv_id: str):
    """根据arXiv IDGet/Fetch论文详情"""
    params = {
        "id_list": arxiv_id,
        "max_results": 1
    }
    
    try:
        response = requests.get(ARXIV_API_URL, params=params, timeout=30)
        
        if response.status_code == 200:
            papers = parse_arxiv_response(response.content)
            
            if isinstance(papers, dict) and "error" in papers:
                return papers
            
            if papers:
                return {
                    "arxiv_id": arxiv_id,
                    "paper": papers[0]
                }
            else:
                return {"error": f"Not foundID为 {arxiv_id} 的论文"}
        else:
            return {"error": f"API调用Failed: HTTP {response.status_code}"}
    except Exception as e:
        return {"error": f"Get/Fetch论文Failed: {str(e)}"}

@mcp.tool()
def get_recent_papers(category: str = None, max_results: int = 10):
    """Get/Fetch最新论文"""
    if category:
        query = f"cat:{category}"
    else:
        query = "all"
    
    return search_papers(query, max_results, "submittedDate")

@mcp.tool()
def advanced_search(title: str = None, author: str = None, abstract: str = None, 
                   category: str = None, max_results: int = 10):
    """高级Search"""
    query_parts = []
    
    if title:
        query_parts.append(f"ti:{title}")
    if author:
        query_parts.append(f"au:{author}")
    if abstract:
        query_parts.append(f"abs:{abstract}")
    if category:
        query_parts.append(f"cat:{category}")
    
    if not query_parts:
        return {"error": "至少Need/RequireProvide一个Search条件"}
    
    query = " AND ".join(query_parts)
    return search_papers(query, max_results, "relevance")

@mcp.tool()
def get_categories():
    """Get/FetcharXiv分类List"""
    categories = {
        "cs": "Computer Science",
        "math": "Mathematics", 
        "physics": "Physics",
        "astro-ph": "Astrophysics",
        "cond-mat": "Condensed Matter",
        "gr-qc": "General Relativity and Quantum Cosmology",
        "hep-ex": "High Energy Physics - Experiment",
        "hep-lat": "High Energy Physics - Lattice",
        "hep-ph": "High Energy Physics - Phenomenology",
        "hep-th": "High Energy Physics - Theory",
        "math-ph": "Mathematical Physics",
        "nlin": "Nonlinear Sciences",
        "nucl-ex": "Nuclear Experiment",
        "nucl-th": "Nuclear Theory",
        "physics": "Physics",
        "quant-ph": "Quantum Physics",
        "q-bio": "Quantitative Biology",
        "q-fin": "Quantitative Finance",
        "stat": "Statistics",
        "eess": "Electrical Engineering and Systems Science",
        "econ": "Economics"
    }
    
    detailed_cs = {
        "cs.AI": "Artificial Intelligence",
        "cs.CL": "Computation and Language",
        "cs.CC": "Computational Complexity",
        "cs.CE": "Computational Engineering, Finance, and Science",
        "cs.CG": "Computational Geometry",
        "cs.GT": "Computer Science and Game Theory",
        "cs.CV": "Computer Vision and Pattern Recognition",
        "cs.CY": "Computers and Society",
        "cs.CR": "Cryptography and Security",
        "cs.DS": "Data Structures and Algorithms",
        "cs.DB": "Databases",
        "cs.DL": "Digital Libraries",
        "cs.DM": "Discrete Mathematics",
        "cs.DC": "Distributed, Parallel, and Cluster Computing",
        "cs.ET": "Emerging Technologies",
        "cs.FL": "Formal Languages and Automata Theory",
        "cs.GL": "General Literature",
        "cs.GR": "Graphics",
        "cs.AR": "Hardware Architecture",
        "cs.HC": "Human-Computer Interaction",
        "cs.IR": "Information Retrieval",
        "cs.IT": "Information Theory",
        "cs.LG": "Machine Learning",
        "cs.LO": "Logic in Computer Science",
        "cs.MS": "Mathematical Software",
        "cs.MA": "Multiagent Systems",
        "cs.MM": "Multimedia",
        "cs.NI": "Networking and Internet Architecture",
        "cs.NE": "Neural and Evolutionary Computing",
        "cs.NA": "Numerical Analysis",
        "cs.OS": "Operating Systems",
        "cs.OH": "Other Computer Science",
        "cs.PF": "Performance",
        "cs.PL": "Programming Languages",
        "cs.RO": "Robotics",
        "cs.SI": "Social and Information Networks",
        "cs.SE": "Software Engineering",
        "cs.SD": "Sound",
        "cs.SC": "Symbolic Computation",
        "cs.SY": "Systems and Control"
    }
    
    return {
        "main_categories": categories,
        "computer_science_subcategories": detailed_cs,
        "note": "这是主要分类，每个分类下还有更细的子分类"
    }

@mcp.tool()
def get_paper_stats(arxiv_id: str):
    """Get/Fetch论文统计Info/Information（基础版本）"""
    paper = get_paper_by_id(arxiv_id)
    
    if "error" in paper:
        return paper
    
    paper_data = paper["paper"]
    
    # 基础统计
    stats = {
        "arxiv_id": arxiv_id,
        "title_length": len(paper_data["title"]),
        "abstract_length": len(paper_data["summary"]),
        "author_count": len(paper_data["authors"]),
        "category_count": len(paper_data["categories"]),
        "published_date": paper_data["published"],
        "last_updated": paper_data["updated"],
        "primary_category": paper_data["primary_category"],
        "all_categories": paper_data["categories"]
    }
    
    # 计算发布时间距今天数
    try:
        pub_date = datetime.strptime(paper_data["published"][:10], "%Y-%m-%d")
        days_since_pub = (datetime.now() - pub_date).days
        stats["days_since_publication"] = days_since_pub
    except:
        stats["days_since_publication"] = "无法计算"
    
    return stats

@mcp.tool()
def get_api_info():
    """Get/FetcharXiv APIInfo/Information"""
    return {
        "service": "arXiv API",
        "description": "Free的学术论文预印本Data库",
        "coverage": "物理学、数学、计算机科学、定量生物学、定量金融学、统计学等",
        "features": [
            "论文Search",
            "作者Search",
            "分类浏览",
            "高级Search",
            "论文详情",
            "最新论文"
        ],
        "search_fields": [
            "ti: 标题",
            "au: 作者",
            "abs: 摘要",
            "cat: 分类",
            "all: 全文"
        ],
        "sort_options": ["relevance", "lastUpdatedDate", "submittedDate"],
        "rate_limit": "每3秒At most1次Request",
        "note": "Free服务，无需API密钥",
        "documentation": "https://arxiv.org/help/api/",
        "data_format": "XML (已转换为JSON)"
    }

if __name__ == "__main__":
    mcp.run() 