"""
Prompts Used in Offline Educational Chatbot System Research
==========================================================

This file documents all prompts used during the AI-assisted research workflow.
Each prompt is timestamped and categorized by research phase.

Author: [Anonymous for Review]
Project: Bridging the AI Accessibility Gap
Course: AGI Assignment 1
"""

from datetime import datetime
from typing import Dict, List, Any

class ResearchPrompts:
    """Collection of all prompts used in the research workflow."""
    
    def __init__(self):
        self.prompts = {
            "problem_identification": self._get_problem_identification_prompts(),
            "architecture_design": self._get_architecture_design_prompts(),
            "dataset_generation": self._get_dataset_generation_prompts(),
            "model_development": self._get_model_development_prompts(),
            "evaluation": self._get_evaluation_prompts(),
            "visualization": self._get_visualization_prompts(),
            "documentation": self._get_documentation_prompts(),
        }
    
    def _get_problem_identification_prompts(self) -> List[Dict[str, Any]]:
        """Prompts used for problem identification and research scoping."""
        return [
            {
                "date": "2024-11-15",
                "phase": "Problem Identification",
                "prompt": """Analyze the current state of educational AI accessibility in underserved regions. 
                Focus on:
                1. Existing gaps in offline educational technology
                2. Infrastructure constraints (power, internet, hardware)
                3. Current solutions like Kolibri and their limitations
                4. Potential for AI-powered educational assistance without internet connectivity
                
                Provide a comprehensive analysis with specific focus on technical feasibility.""",
                "purpose": "Initial problem scoping and gap analysis"
            },
            {
                "date": "2024-11-18",
                "phase": "Literature Review",
                "prompt": """Generate a structured literature review covering:
                1. Edge AI and model compression techniques for educational applications
                2. Offline content delivery systems in education
                3. Multi-task learning for educational AI systems
                4. Performance evaluation metrics for educational chatbots
                
                Include recent papers from 2020-2024 and focus on practical deployment considerations.""",
                "purpose": "Comprehensive literature review for background research"
            }
        ]
    
    def _get_architecture_design_prompts(self) -> List[Dict[str, Any]]:
        """Prompts used for system architecture design."""
        return [
            {
                "date": "2024-11-22",
                "phase": "Architecture Design",
                "prompt": """Design a lightweight AI architecture for offline educational assistance with these constraints:
                - Maximum 4GB RAM usage
                - Response time under 500ms
                - Must work without internet connectivity
                - Support for grades 6-12 mathematics and science
                - Deployment on basic hardware (2-core CPU minimum)
                
                Recommend specific model architectures, training approaches, and optimization strategies.""",
                "purpose": "Core system architecture specification"
            },
            {
                "date": "2024-11-24",
                "phase": "Multi-task Learning Design",
                "prompt": """Design a multi-task learning framework for educational AI that combines:
                1. Language generation for explanations
                2. Curriculum alignment for grade-appropriate responses  
                3. Safety filtering for educational content
                
                Specify loss function weighting, training procedures, and evaluation metrics for each task.""",
                "purpose": "Multi-task learning framework design"
            }
        ]
    
    def _get_dataset_generation_prompts(self) -> List[Dict[str, Any]]:
        """Prompts used for synthetic dataset creation."""
        return [
            {
                "date": "2024-11-28",
                "phase": "Dataset Generation",
                "prompt": """Generate 1000 synthetic educational examples for grade 8 mathematics covering:
                - Linear equations and inequalities
                - Basic geometry (area, perimeter, volume)
                - Introduction to algebra
                - Word problems with real-world context
                
                Each example should include: problem statement, step-by-step solution, final answer, and difficulty level.
                Ensure curriculum alignment with common core standards.""",
                "purpose": "Synthetic educational content generation for mathematics"
            },
            {
                "date": "2024-12-01",
                "phase": "Dataset Generation",
                "prompt": """Create 800 science education examples for grades 6-10 covering:
                - Basic physics (motion, forces, energy)
                - Chemistry fundamentals (atoms, molecules, reactions)
                - Earth science and environmental concepts
                - Scientific method and inquiry
                
                Include conceptual explanations, practical examples, and age-appropriate language.""",
                "purpose": "Synthetic educational content generation for science"
            },
            {
                "date": "2024-12-03",
                "phase": "Negative Sampling",
                "prompt": """Generate 500 non-educational examples that should NOT be handled by an educational chatbot:
                - Personal advice and counseling
                - Medical or health advice
                - Financial investment guidance
                - Political opinions or controversial topics
                - Entertainment recommendations
                
                These will be used for safety training to ensure the model stays focused on educational content.""",
                "purpose": "Negative examples for safety training"
            }
        ]
    
    def _get_model_development_prompts(self) -> List[Dict[str, Any]]:
        """Prompts used for model development and training."""
        return [
            {
                "date": "2024-12-08",
                "phase": "Model Implementation",
                "prompt": """Implement a PyTorch model class for the educational chatbot based on DistilBERT with:
                1. Base DistilBERT encoder (66M parameters)
                2. Educational classification head (256→128 neurons)
                3. Language modeling head for generation
                4. Multi-task loss computation
                
                Include proper initialization, forward pass, and training utilities.""",
                "purpose": "Core model implementation in PyTorch"
            },
            {
                "date": "2024-12-12",
                "phase": "Training Pipeline",
                "prompt": """Create a training pipeline that:
                1. Handles multi-task learning with weighted losses
                2. Implements early stopping based on validation performance
                3. Monitors memory usage and response time during training
                4. Saves checkpoints and generates training curves
                
                Use AdamW optimizer with learning rate 2e-5 and batch size 8.""",
                "purpose": "Complete training pipeline implementation"
            }
        ]
    
    def _get_evaluation_prompts(self) -> List[Dict[str, Any]]:
        """Prompts used for model evaluation and analysis."""
        return [
            {
                "date": "2024-12-20",
                "phase": "Evaluation Framework",
                "prompt": """Design a comprehensive evaluation framework for the educational chatbot including:
                1. Educational accuracy: curriculum alignment + factual correctness
                2. Response quality: fluency + relevance + safety
                3. Technical performance: latency, memory usage, throughput
                4. User experience: simulated satisfaction metrics
                
                Define specific metrics, measurement procedures, and success thresholds.""",
                "purpose": "Evaluation methodology design"
            },
            {
                "date": "2024-12-28",
                "phase": "Baseline Comparisons",
                "prompt": """Implement baseline comparison experiments against:
                1. Offline textbooks (static content retrieval)
                2. Kolibri vanilla (existing offline education platform)
                3. ChatGPT-3.5 (online AI assistant - simulated)
                4. Generic DistilBERT (without educational training)
                
                Use consistent evaluation metrics and statistical significance testing.""",
                "purpose": "Baseline comparison implementation"
            }
        ]
    
    def _get_visualization_prompts(self) -> List[Dict[str, Any]]:
        """Prompts used for results visualization."""
        return [
            {
                "date": "2025-01-12",
                "phase": "Visualization",
                "prompt": """Create publication-quality visualizations for the research paper:
                1. Training curves (loss and accuracy over epochs)
                2. Performance comparison bar charts across baselines
                3. Ablation study results heatmap
                4. Deployment analysis by institution type
                5. Subject and grade-level performance breakdown
                
                Use consistent styling, clear labels, and colorblind-friendly palettes.""",
                "purpose": "Research paper visualization generation"
            },
            {
                "date": "2025-01-14",
                "phase": "Resource Analysis",
                "prompt": """Generate visualizations showing:
                1. Memory usage over time during inference
                2. Response time distribution across query types
                3. Model size vs. performance trade-offs
                4. Deployment success rates by hardware configuration
                
                Focus on demonstrating resource efficiency for offline deployment.""",
                "purpose": "Resource efficiency visualization"
            }
        ]
    
    def _get_documentation_prompts(self) -> List[Dict[str, Any]]:
        """Prompts used for documentation and reporting."""
        return [
            {
                "date": "2025-01-18",
                "phase": "Documentation",
                "prompt": """Generate comprehensive documentation including:
                1. README with reproduction instructions
                2. Code comments and docstrings
                3. API documentation for main functions
                4. Troubleshooting guide for common issues
                
                Ensure all documentation supports full reproducibility of results.""",
                "purpose": "Comprehensive project documentation"
            },
            {
                "date": "2025-01-20",
                "phase": "Paper Writing",
                "prompt": """Assist with LaTeX formatting and structure for academic paper:
                1. Proper citation formatting
                2. Table and figure placement
                3. Mathematical notation consistency
                4. Conference template compliance
                
                Focus on technical accuracy and academic writing standards.""",
                "purpose": "Academic paper formatting assistance"
            }
        ]
    
    def get_all_prompts(self) -> Dict[str, List[Dict[str, Any]]]:
        """Return all prompts organized by research phase."""
        return self.prompts
    
    def get_prompts_by_phase(self, phase: str) -> List[Dict[str, Any]]:
        """Get prompts for a specific research phase."""
        return self.prompts.get(phase, [])
    
    def get_prompt_summary(self) -> Dict[str, int]:
        """Get summary statistics of prompts used."""
        return {
            phase: len(prompts) 
            for phase, prompts in self.prompts.items()
        }

# Usage Example
if __name__ == "__main__":
    research_prompts = ResearchPrompts()
    
    print("=== Offline Educational Chatbot Research Prompts ===")
    print(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print()
    
    # Print summary
    summary = research_prompts.get_prompt_summary()
    print("Prompt Summary by Phase:")
    for phase, count in summary.items():
        print(f"  {phase}: {count} prompts")
    
    print(f"\nTotal prompts used: {sum(summary.values())}")
    
    # Example: Get all dataset generation prompts
    dataset_prompts = research_prompts.get_prompts_by_phase("dataset_generation")
    print(f"\nDataset Generation Prompts ({len(dataset_prompts)}):")
    for i, prompt in enumerate(dataset_prompts, 1):
        print(f"{i}. [{prompt['date']}] {prompt['purpose']}")