import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from openai import OpenAI


class FeedbackRAGAgent:
    def __init__(
        self,
        pdf_path,
        chroma_path,
        embedding_model_name,
        llm_model_name,
        llm_api_key,
        llm_api_base
    ):
        self.pdf_path = pdf_path
        self.chroma_path = chroma_path
        self.embedding_model_name = embedding_model_name
        self.llm_model_name = llm_model_name

        # Initialize Nebius or OpenAI client
        self.llm_client = OpenAI(api_key=llm_api_key, base_url=llm_api_base)

        # Load chunks and DB
        self.chunks = self._load_chunks()
        self.embeddings = HuggingFaceEmbeddings(model_name=self.embedding_model_name)
        self.db = self._create_or_load_chroma_db()

    def _load_chunks(self):
        loader = PyPDFLoader(self.pdf_path)
        pages = loader.load()
        splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
        return splitter.split_documents(pages)

    def _create_or_load_chroma_db(self):
        return Chroma.from_documents(
            documents=self.chunks,
            embedding=self.embeddings,
            persist_directory=self.chroma_path
        )

    def generate_retrieval_query(self, question, error_step, error_explanation):
        prompt = f"""
                You are a helpful assistant generating a focused retrieval query for a RAG (retrieval-augmented generation) system in physics education.

                You are given:
                - A physics question
                - A specific step of a LLM generated solution containing conceptual error
                - A short explanation of the conceptual misunderstanding

                Your goal is to generate a short, precise query that helps retrieve the correct concept(s) or principles from a physics textbook to fix LLM generated solution misunderstanding.

                Question:
                {question}

                Error Step:
                {error_step}

                Error Explanation:
                {error_explanation}

                Output only the retrieval query. Be specific and concise.
                """
        
        response = self.llm_client.chat.completions.create(
            model=self.llm_model_name,
            temperature=0,
            messages=[
                {"role": "system", "content": "You generate retrieval queries to find the correct physics concepts."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def retrieve_context(self, query, k=3):
        results = self.db.similarity_search_with_score(query, k=k)
        return "\n\n".join([doc.page_content for doc, _ in results])

    def generate_structured_feedback(self, question, error_step, error_explanation, retrieved_context):
        prompt = f"""
                You are an expert physics assistant.

                You are given the following:
                - A physics question
                - A description of a conceptual error in a llm generated step (including the step and explanation)
                - A relevant excerpt from the textbook or notes

                Your task is to write a clear and structured feedback that helps the LLM understand what was wrong and how to fix it, using the retrieved context.

                Question:
                {question}

                Error Step:
                {error_step}

                Error Explanation:
                {error_explanation}

                Retrieved Context:
                {retrieved_context}

                Now generate structured correction feedback.
                """
        
        response = self.llm_client.chat.completions.create(
            model=self.llm_model_name,
            temperature=0.2,
            messages=[
                {"role": "system", "content": "You help LLM understand and correct conceptual physics mistakes."},
                {"role": "user", "content": prompt}
            ]
        )
        return response.choices[0].message.content.strip()

    def run_feedback_cycle(self, question, error_step, error_explanation):
        retrieval_query = self.generate_retrieval_query(
            question, error_step, error_explanation
        )
        context = self.retrieve_context(retrieval_query)
        structured_feedback = self.generate_structured_feedback(
            question, error_step, error_explanation, context
        )
        return {
            "question": question,
            "error_step": error_step,
            "error_explanation": error_explanation,
            "retrieval_query": retrieval_query,
            "context_used": context,
            "structured_feedback": structured_feedback
        }

