import requests
from markdownify import markdownify as md
from ..openai import openai_call
from ..logger import get_logger
from ..constants import OUTPUT_PATH # save extracted abstract for debugging

logger = get_logger(__name__)

SYSTEM_PROMPT = '''
You are a helpful assistant to clean the markdown to get the 'abstract' part of it.
If this part is missing or the user does not provide a markdown file, you should only reply with 'FAILED'.
If you sccessfully find the 'abstract' part, reply ONLY with its content.
'''
USER_PROMPT = '''
Read the following markdown file and get me the abstract part.
{md_content}
'''
def extract_abstract(url: str, paper_title: str) -> bool | str:
    '''convert the webpage to markdown'''
    response = requests.get(url)
    if response.status_code == 200:
        html_content = response.content
        md_content = str(md(html_content))
        if md_content:
            input_ = USER_PROMPT.format(md_content=md_content)
            mess = [
                {
                    'role': 'system',
                    'content': SYSTEM_PROMPT,
                },
                {
                    'role': 'user',
                    'content': input_,
                }
            ]
            abstract_ = openai_call(mess)
            if abstract_ == 'FAILED':
                logger.info(f"failed to extract the abstract of \{paper_title}\ on this webpage: {url}")
                return False
            else:
                return abstract_
        else:
            logger.info(f"failed to convert the webpage of \{paper_title}\ to markdown: {url}")
            return False
    else:
        logger.info(f"failed to request the webpage of \{paper_title}\: {url}")
        return False