EXTRACT_DES_SYS = '''Read the text user provided and use keywords or a phrase to explain why the text mentions the specified citation, for example, what topic it belongs to in the text, etc.
Here is an example for you:
    user input:
        text: Retrieval Augmented Generation (RAG) leverages an extensive database of documents to extract task-related information that aids in response generation. Many efforts investigate various levels of retrieval granularity, including tokens (Khandelwal et al., 2019), entities (Févry et al., 2020; De Jong et al., 2021), and chunks (Liu, 2024; LangChain-team, 2024).
        citation: Khandelwal et al., 2019
    result:
        token level of RAG retrieval granularity to extract task-related information that aids in response generation.
'''

EXTRACT_USER = '''text: {context}
citation: {citation}'''

EXTRACT_LOC_SYS = '''Read the text provided by the user and extract the logical hierarchical position of the specified reference within the text.
Here is an example for you:
    user input:
        text: Retrieval Augmented Generation (RAG) leverages an extensive database of documents to extract task-related information that aids in response generation. Many efforts investigate various levels of retrieval granularity, including tokens (Khandelwal et al., 2019), entities (Févry et al., 2020; De Jong et al., 2021), and chunks (Liu, 2024; LangChain-team, 2024).
        citation: Khandelwal et al., 2019
    result:
        Retrieval Augmented Generation (RAG) - extract task-related information that aids in response generation - retrieval granularity - tokens
'''

EXTRACT_FRAMEWORK_SYS = '''Based on the topic and text content provided by the user, extract entities and relationships from the text according to its logical structure.
Please return a valid JSON object without adding any extra tags or explanations.
The selectable entity types include: "topic", "method", "paper", "problem"
The selectable relation include: "use method", "research on the method", "have problem", "address problem"
Here is an example for you:
user input: 
    topic: Retrieval
    content: Retrieval Augmented Generation (RAG) leverages an extensive database of documents to extract task-related information that aids in response generation. Many efforts investigate various levels of retrieval granularity, including tokens (Khandelwal et al., 2019), entities (Févry et al., 2020; De Jong et al., 2021), and chunks (Liu, 2024; LangChain-team, 2024). Other approaches have explored diverse retrieval methods, such as BM25 (Rasooli and Tetreault, 2015) and learning-based strategies (Khattab and Zaharia, 2020; Sachan et al., 2023; Sun et al., 2021). Despite its capabilities, RAG faces challenges in addressing complex questions due to difficulties in developing robust decision-making mechanisms. In contrast, we employ agents that use planning and reflection to gather essential information, effectively tackling complex problems.
result: 
    [
        {
            "list name": "entity list",
            "content": [
                {
                    "entity name": "retrieval",
                    "enttiy type": "topic",
                },
                {
                    "entity name": "Retrieval Augmented Generation (RAG)",
                    "entity type": "method",
                },
                {
                    "entity name": "Khandelwal et al., 2019",
                    "entity type": "paper",
                },
                {
                    "entity name": "Févry et al., 2020",
                    "entity type": "paper",
                },
                {
                    "entity name": "De Jong et al., 2021",
                    "entity type": "paper",
                },
                {
                    "entity name": "Liu, 2024",
                    "entity type": "paper",
                },
                {
                    "entity name": "LangChain-team, 2024",
                    "entity type": "paper",
                },
                {
                    "entity name": "BM25",
                    "entity type": "method",
                },
                {
                    "entity name": "Rasooli and Tetreault, 2015",
                    "entity type": "paper",
                },
                {
                    "entity name": "learning-based strategies",
                    "entity type": "method",
                },
                {
                    "entity name": "Khattab and Zaharia, 2020",
                    "entity type": "paper",
                },
                {
                    "entity name": "Sachan et al., 2023",
                    "entity type": "paper",
                },
                {
                    "entity name": "Sun et al., 2021",
                    "entity type": "paper",
                },
                {
                    "entity name": "address complex questions",
                    "entity type": "problem",
                },
                {
                    "entity name": "agents that use planning and reflection to gather essential information",
                    "entity type": "method",
                }
            ]
        },
        {
            "list name": "relation list",
            "content": [
                {
                    "entity1": "retrieval",
                    "relation": "use method",
                    "entity2": "Retrieval Augmented Generation (RAG)",
                    "description": "Retrieval Augmented Generation (RAG) is a retrieval method"
                },
                {
                    "entity1": "Khandelwal et al., 2019",
                    "relation": "research on the method",
                    "entity2": "Retrieval Augmented Generation (RAG)",
                    "description": "Khandelwal et al., 2019 researches on token level retrieval granularity"
                },
                {
                    "entity1": "Févry et al., 2020",
                    "relation": "research on the method",
                    "entity2": "Retrieval Augmented Generation (RAG)",
                    "description": "Févry et al., 2020 researches on entity level retrieval granularity"
                },
                {
                    "entity1": "De Jong et al., 2021",
                    "relation": "research on the method",
                    "entity2": "Retrieval Augmented Generation (RAG)",
                    "description": "De Jong et al., 2021 researches on entity level retrieval granularity"
                },
                {
                    "entity1": "Liu, 2024",
                    "relation": "research on the method",
                    "entity2": "Retrieval Augmented Generation (RAG)",
                    "description": "Liu, 2024 researches on chunck level retrieval granularity"
                },
                {
                    "entity1": "LangChain-team, 2024",
                    "relation": "research on the method",
                    "entity2": "Retrieval Augmented Generation (RAG)",
                    "description": "LangChain-team, 2024 researches on chunck level retrieval granularity"
                },
                {
                    "entity1": "Retrieval Augmented Generation (RAG)",
                    "relation": "use method",
                    "entity2": "BM25",
                    "description": "BM25 is a retrieval method"
                },
                {
                    "entity1": "Rasooli and Tetreault, 2015",
                    "relation": "research on the method",
                    "entity2": "BM25",
                    "description": "Rasooli and Tetreault, 2015 researches on BM25"
                },
                {
                    "entity1": "Retrieval Augmented Generation (RAG)",
                    "relation": "use method",
                    "entity2": "learning-based strategies",
                    "description": "learning-based strategies are used in retrieval method"
                },
                {
                    "entity1": "Khattab and Zaharia, 2020",
                    "relation": "research on the method",
                    "entity2": "learning-based strategies",
                    "description": "Khattab and Zaharia, 2020 researches on learning-based strategies"
                },
                {
                    "entity1": "Sachan et al., 2023",
                    "relation": "research on the method",
                    "entity2": "learning-based strategies",
                    "description": "Sachan et al., 2023 researches on learning-based strategies"
                },
                {
                    "entity1": "Sun et al., 2021",
                    "relation": "research on the method",
                    "entity2": "learning-based strategies",
                    "description": "Sun et al., 2021 researches on learning-based strategies"
                },
                {
                    "entity1": "Retrieval Augmented Generation (RAG)",
                    "relation": "have problem",
                    "entity2": "address complex questions",
                    "description": "RAG has problem addressing complex questions due to difficulties in developing robust decision-making mechanisms"
                },
                {
                    "entity1": "retrieval",
                    "relation": "use method",
                    "entity2": "agents that use planning and reflection to gather essential information",
                    "description": "agents that use planning and reflection to gather essential information can be used for retireval"
                },
                {
                    "entity1": "agents that use planning and reflection to gather essential information",
                    "relation": "address problem",
                    "entity2": "address complex questions",
                    "decription": "agents that use planning and reflection to gather essential information can effectively address complex questions"
                }
            ]
        }
    ]'''
    
EXTRACT_FRAMEWORK_USER = '''Extract entities and relations based on the information below and return a JSON object.
topic: {topic}
content: {content}'''

PARA_CLASSIFER = '''The following content is an excerpt extracted from a paper. Your task is to judge whether this excerpt is a title in markdown format or text content.
If the excerpt is a title and the content of the title is title_content, reply with 
{
"type": "title", 
"content": title_content
}
If the excerpt is a content text, reply with
{
"type": "content", 
"content": None
}
Please return a valid JSON object without adding any extra tags or explanations.
excerpt: {content}'''

PARSE_REF_SYS = '''Extract the list of authors, article title, and date from the reference entry provided by the user, and return it in JSON format.
Please note that only the year and any immediately following letters (if present) should be retained from the extracted time.
Here is an example:
reference entry: Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia, Ed Chi, Quoc Le, and Denny Zhou. Chain of thought prompting elicits reasoning in large language models. Conference on Neural Information Processing Systems (NeurIPS), 2022a. URL https://arxiv. org/pdf/2201.11903.
parse result:
    {
        "authors": "Jason Wei, Xuezhi Wang, Dale Schuurmans, Maarten Bosma, Brian Ichter, Fei Xia, Ed Chi, Quoc Le, and Denny Zhou",
        "title": "Chain of thought prompting elicits reasoning in large language models",
        "date": "2022a"
    }''' 

PARSE_REF_USER = '''reference entry: {ref}'''

TITLE_ABS_EXTRACTION_SYS = '''Extract entities and relationships based on the paper title and abstract content.
The selectable entity types include: "topic", "method", "paper"
The selectable relation include: "use method", "research on the method", "have problem", "address problem", "subtopic"
Here is an example for you:
paper title: GraphReader: Building Graph-based Agent to Enhance Long-Context Abilities of Large Language Models
abstract: Long-context capabilities are essential for large language models (LLMs) to tackle complex and long-input tasks. Despite numerous efforts made to optimize LLMs for long contexts, challenges persist in robustly processing long inputs. In this paper, we introduce GraphReader, a graph-based agent system designed to handle long texts by structuring them into a graph and employing an agent to explore this graph autonomously. Upon receiving a question, the agent first undertakes a step-by-step analysis and devises a rational plan. It then invokes a set of predefined functions to read node content and neighbors, facilitating a coarse-to-fine exploration of the graph. Throughout the exploration, the agent continuously records new insights and reflects on current circumstances to optimize the process until it has gathered sufficient information to generate an answer. Experimental results on the LV-Eval dataset reveal that GraphReader, using a 4k context window, consistently outperforms GPT-4-128k across context lengths from 16k to 256k by a large margin. Additionally, our approach demonstrates superior performance on four challenging single-hop and multi-hop benchmarks.
subtitles of related work: Long-Context LLMs, Retrieval, Agent for Retrieval
result:
    [
        {
            "list name": "entity list",
            "content": [
                {
                    "entity name": "GraphReader: Building Graph-based Agent to Enhance Long-Context Abilities of Large Language Models",
                    "entity type": "paper"
                },
                {
                    "entity name": "long-context capabilities of LLM",
                    "entity type": "topic"
                },
                {
                    "entity name": "robustly process long inputs",
                    "entity type": "problem"
                },
                {
                    "entity name": "GraphReader",
                    "entity type": "method"
                },
                {
                    "entity name": "long-context LLMs",
                    "entity type": "topic"
                },
                {
                    "entity name": "retrieval",
                    "entity type": "topic"
                },
                {
                    "entity name": "agent for retrieval",
                    "entity type": "topic"
                }
            ]
        },
        {
            "list name": "relation list",
            "content": [
                {
                    "entity1": "long-context capabilities of LLM",
                    "relation": "have problem",
                    "entity2": "robustly process long inputs",
                    "description": "robustly processing long inputs is a problem in improving long-context capabilities of LLM"
                },
                {
                    "entity1": "GraphReader: Building Graph-based Agent to Enhance Long-Context Abilities of Large Language Models",
                    "relation": "use method",
                    "entity2": "GraphReader",
                    "description": "the paper GraphReader: Building Graph-based Agent to Enhance Long-Context Abilities of Large Language Models proposes the GraphReader" 
                },
                {
                    "entity1": "GraphReader",
                    "relation": "address problem",
                    "entity2": "robustly process long inputs",
                    "description": "GraphReader handles long texts by structuring them into a graph and employing an agent to explore this graph autonomously"
                },
                {
                    "entity1": "long-context LLMs",
                    "relation": "subtopic",
                    "entity2": "long-context capabilities of LLM",
                    "description": "long-context LLMs are LLMs capable of processing long-context"
                },
                {
                    "entity1": "retrieval",
                    "relation": "subtopic",
                    "entity2": "long-context capabilities of LLM",
                    "description": "retrieval method can be used to strengthen long-context capabilities of LLM"
                },
                {
                    "entity1": "GraphReader",
                    "relation": "use method",
                    "entity2": "retrieval",
                    "description": "GraphReader needs to use methods for retrieval on graph structures."
                },
                {
                    "entity1": "GraphReader",
                    "relation": "use method",
                    "entity2": "retrieval",
                    "description": "GraphReader needs to use methods for retrieval on graph structures."
                }
            ]
        }
    ]'''