import os
import arxiv
from pypdf import PdfReader
import warnings


def _extract_text(pdf_path) -> str:
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text


def parse_arxiv(rag_agent, n: int = 1) -> list[str]:
    if n == 0:
        return ['']
    elif n < 0 or not (isinstance(n, int) and not isinstance(n, bool)):
        raise ValueError("parameter 'n' must be a non-negative integer")
    elif n > 5:
        warnings.warn("Parameter 'n' must be less than or equal to 5. It was automatically changed to 5.", UserWarning)
        n = 5
    queries = rag_agent.get_arxiv_queries(n=n)
    
    texts = []
    client = arxiv.Client()
    for query in queries:
        search = arxiv.Search(query=query, max_results=2,)
        results = client.results(search)
        for r in results:
            pdf_path = r.download_pdf()
            texts.append(_extract_text(pdf_path))
            os.remove(pdf_path)
            texts = list(set(texts))
            if len(texts) >= n:
                return texts
    return texts
