import os
import json
import re
import sys
from pathlib import Path
import argparse
from datetime import datetime
import glob
from functools import cached_property
from abc import ABC, abstractmethod

from typing import Any


class MarkdownConverterBase(ABC):
    """
    Base class for converting JSON data to Markdown format.
    """

    def _extract_number(self, filename: str, prefix: str = 'problem') -> int:
        """
        Extract a number from the filename based on a prefix.
        Format supported: '{prefix}_<number>'.
        returns -1 if the number cannot be extracted.
        Args:
            filename (str): The name of the file to extract the number from.
            prefix (str): The prefix to look for in the filename.
        Returns:
            int: The extracted number, or -1 if not found.
        """
        match = re.search(rf'{prefix}_(\d+)', filename)
        if match:
            return int(match.group(1))
        print(
            f"Warning: Cannot extract {prefix} number from '{filename}', using -1 instead",
            flush=True)
        return -1

    @property
    @abstractmethod
    def _title(self) -> str:
        """
        The title of the Markdown document.
        Returns:
            str: The title string.
        """
        pass

    @property
    def _markdown_header(self) -> str:
        """
        The default Markdown header.
        Returns:
            str: A Markdown formatted header string.
        """
        return f"# {self._title}\nData extracted on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\nData Source: {self.save_dir}\n\n"

    def __init__(self,
                 save_dir: str,
                 index_names: list[str] = ['problem', 'proof', 'generate'],
                 output_dir: str | None = None):
        """
        Initialize the MarkdownConverterBase with a directory to read JSON files and index names.
        Args:
            save_dir (str): The directory where JSON files are stored.
            index_names (list[str]): A list of index names to extract from the filenames. Also serves as the subheader names in the Markdown.
            output_dir (str): The directory where the Markdown output will be saved.
        """
        self.save_dir = save_dir
        self.index_names = index_names
        self.output_dir = output_dir if output_dir else os.path.abspath(os.path.join(
            save_dir, os.pardir, 'markdown_output'))

    def _read_json_files(self, lines: int|None = None) -> list[dict[str, Any]]:
        """
        Read all JSON files in the specified directory and extract data based on index names.
        Args:
            lines (int | None): The number of solutions to read. If None, read all solutions.
        Returns:
            list[dict[str,Any]]: A list of dictionaries containing the extracted data.
        """
        if not os.path.exists(self.save_dir):
            print(f"Error: Directory '{self.save_dir}' does not exist",
                  flush=True)
            sys.exit(1)

        json_files = glob.glob(os.path.join(self.save_dir, '*.json'))
        if not json_files:
            print(f"Warning: No JSON files found in '{self.save_dir}'",
                  flush=True)
            return []

        print(f"Found {len(json_files)} JSON files", flush=True)

        data_list = []

        for file_path in json_files:
            file_name = os.path.basename(file_path)
            data = {}
            for index_name in self.index_names:
                data[index_name + '_id'] = self._extract_number(
                    file_name, index_name)

            print(f"Processing file: {file_path}", flush=True)

            try:
                with open(file_path, 'r', encoding='utf-8') as f:
                    json_data = json.load(f)
                    for key in json_data:
                        if key not in data:
                            data[key] = json_data[key]
            except Exception as e:
                print(f"Error reading JSON from file '{file_path}': {e}",
                      flush=True)

            data_list.append(data)
            if lines is not None and len(data_list) >= lines:
                print(f"Read {lines} solutions, stopping early", flush=True)
                break
        return data_list

    def _get_subheader_content(self, data: dict[str, Any], index: str) -> str:
        """
        Get the content for the subheader
        Args:
            data (dict[str, Any]): The dictionary containing the extracted data.
            index (str): The index name to use for the subheader.
        Returns:
            str: The subheader string.
        """
        subheader = f"{'#' * (self.index_names.index(index) + 1)} {index.capitalize()} {data.get(index + '_id', 'unknown')}"
        subheader += "\n\n"
        return subheader

    @abstractmethod
    def _get_subheader_description(self, data: dict[str, Any],
                                   index: str) -> str:
        """
        Generate the description info attached beneath the subheader.
        Args:
            data (dict[str, Any]): The dictionary containing the extracted data.
            index (str): The index name to use for the subheader.
        Returns:
            str: The description string.
        """
        description = ""
        return description

    def _hide_detail(self,
                     content: str,
                     summary: str = "Click to Expand") -> str:
        """
        Enclose the given content string in < details > HTML tags with a summary.
        Args:
            content (str): The content to be enclosed.
            summary (str): The summary text for the details tag.
        Returns:
            str: The content enclosed in < details > tags.
        """
        return f"<details>\n<summary>{summary}</summary>\n\n{content}\n\n</details>\n\n"

    def _convert_to_markdown(self, data_list: list[dict[str, Any]]) -> str:
        """
        Convert the extracted data to Markdown format.
        Args:
            data_list (list[dict[str, Any]]): The list of dictionaries containing the extracted data.
        Returns:
            str: The Markdown formatted string.
        """
        markdown = self._markdown_header
        sub_headers = {index: [] for index in self.index_names}
        clean = False
        for data in data_list:
            clean = False
            for index in self.index_names:
                if clean:
                    sub_headers[index] = []
                if data.get(index + '_id') not in sub_headers[index]:
                    sub_headers[index].append(data.get(index + '_id'))
                    clean = True
                    markdown += self._get_subheader_content(data, index)
                    markdown += self._get_subheader_description(data, index)
        return markdown

    def _post_process(self, markdown: str) -> str:
        """
        Post-process the Markdown content.
        Args:
            markdown (str): The Markdown content to be processed.
        Returns:
            str: The processed Markdown content.
        """
        # Remove extra newlines after $$ (math block end)
        markdown = re.sub(r'\n+\$\$\n+', r'\n$$\n', markdown)
        markdown = re.sub(r'[ \n]+\}', '}',
                          markdown)  # Remove trailing spaces before }
        markdown = re.sub(r'[ \n]+\]', ']',
                          markdown)  # Remove trailing spaces before ]
        return markdown

    def convert(self, lines: int | None = None) -> None:
        """
        Convert the JSON data to Markdown format.
        Returns:
            str: The final Markdown formatted string.
        """
        data_list = self._read_json_files(lines)

        markdown = self._convert_to_markdown(data_list)
        markdown = self._post_process(markdown)
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir, exist_ok=True)
        output_path = os.path.join(self.output_dir, 'output.md')
        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(markdown)


def get_parser():
    """
    Return a common argument parser for the Markdown converter.
    """
    parser = argparse.ArgumentParser(
        description="Convert JSON data to Markdown format")
    parser.add_argument('--save_dir',
                        type=str,
                        required=True,
                        help="Directory containing JSON files to convert.")
    parser.add_argument(
        '--output_dir',
        type=str,
        default=None,
        help=
        "Directory to save the Markdown output, default is a subdirectory 'markdown_output' in save_dir"
    )
    parser.add_argument(
        '--lines',
        type=int,
        default=None,
        help=
        "Number of solutions to read from the JSON files. If None, read all solutions."
    )
    return parser
