from typing import List
import json
import ast
import re

def parse_json(text):
    """Improved JSON parsing with support for malformed JSON and Chinese characters"""
    try:
        # First try direct parsing
        return ast.literal_eval(text.strip())
    except (ValueError, SyntaxError):
        # Handle common errors
        text = text.rstrip('\\')
        text = text.replace('“', '"').replace('”', '"')
        
        # Find all keys and split text
        key_positions = [m.start() for m in re.finditer(r'"\w+"\s*:', text)]
        if not key_positions:
            raise ValueError(f"无法解析 JSON: {text}")

        # Split text into key-value segments
        segments = []
        for i, pos in enumerate(key_positions):
            if i == len(key_positions) - 1:
                segments.append(text[pos:])
            else:
                segments.append(text[pos:key_positions[i+1]])
        
        result = {}
        for segment in segments:
            # Try to parse as key-value pair
            key_end = segment.find(':')
            key = segment[:key_end].strip('"')
            value = segment[key_end+1:].strip()
            
            # Handle value parsing
            if value.startswith('"'):
                opening_quote = value[0]
                last_quote_pos = value.rfind(opening_quote)
                if last_quote_pos > 0:
                    value = value[1:last_quote_pos]
                else:
                    value = value[1:]
            elif value.endswith('"'):
                closing_quote = value[-1]
                first_quote_pos = value.find(closing_quote)
                if first_quote_pos < len(value) - 1:
                    value = value[first_quote_pos+1:-1]
                else:
                    value = value[:-1]
                    
            result[key] = value
        return result
    
def extract_json(messages: List) -> List[List[dict]]:
    """Extracts JSON content from a list of strings where JSON is embedded between `json` tags or is plain JSON.

    Parameters:
        messages (List[str]): The list of texts containing the JSON content.

    Returns:
        List[List[dict]]: A list of lists of extracted JSON objects.
    """
    # Define the regular expression pattern to match JSON blocks with ```json tags
    pattern = r"```json\s*(.*?)\s*```"

    results = []

    for message in messages:
        text = message.content  # Assuming message.content contains the string

        # First, try to find JSON blocks with ```json tags
        matches = re.findall(pattern, text, re.DOTALL)

        # If we find matches with the ```json block, return parsed JSON
        if matches:
            try:
                results.append([parse_json(match.strip()) for match in matches])
                continue
            except json.JSONDecodeError:
                raise ValueError(f"Failed to parse JSON from the content wrapped with ```json: {text}")
        
        # If no matches, try parsing plain JSON (without the ```json block)
        try:
            results.append([parse_json(text.strip())])
        except (ValueError, SyntaxError):
            raise ValueError(f"无法解析纯 JSON: {text}")

    return results
