from typing import Any, Dict, List, Set

from pydantic import JsonValue


def _is_bool(v: Any) -> bool:
    return isinstance(v, bool)


def _is_int(v: Any) -> bool:
    return isinstance(v, int) and not isinstance(v, bool)


def _is_float(v: Any) -> bool:
    return isinstance(v, float)


def _infer_for_values(values: List[JsonValue]) -> Dict[str, Any]:
    """
    Infer a JSON Schema (Draft 2020-12 compatible keywords) that validates all given values.

    Strategy:
    - Determine present JSON types among the provided values.
    - For each present type, add the corresponding type-specific constraints:
      - object: merge properties, compute required (intersection), set additionalProperties to false
      - array: merge items across all arrays, set minItems/maxItems
      - number/integer: choose integer if all are ints, otherwise number; set minimum/maximum
      - string: set minLength/maxLength
      - boolean/null: type only
    - If only a single type is present, set type to that string; otherwise set type to a list of strings.
    """

    if not values:
        # Empty set of examples -> allow anything
        return {}

    # Partition values by kind
    objs = [v for v in values if isinstance(v, dict)]
    arrs = [v for v in values if isinstance(v, list)]
    bools = [v for v in values if _is_bool(v)]
    ints = [v for v in values if _is_int(v)]
    floats = [v for v in values if _is_float(v)]
    strings = [v for v in values if isinstance(v, str)]
    nulls = [v for v in values if v is None]

    # Numeric aggregation: if any float present, treat all numeric as number
    numeric_values = ints + floats
    has_number = len(numeric_values) > 0
    use_integer = has_number and len(floats) == 0

    present_types: List[str] = []
    if objs:
        present_types.append("object")
    if arrs:
        present_types.append("array")
    if strings:
        present_types.append("string")
    if use_integer:
        present_types.append("integer")
    elif has_number:
        present_types.append("number")
    if bools:
        present_types.append("boolean")
    if nulls:
        present_types.append("null")

    schema: Dict[str, Any] = {}

    # Object-specific
    if objs:
        all_keys: Set[str] = (
            set().union(*[set(o.keys()) for o in objs]) if objs else set()
        )
        required_keys: Set[str] = set(all_keys)
        properties: Dict[str, Any] = {}

        for k in sorted(all_keys):
            # Collect values for this key across objects that contain it
            vals_for_k = [o[k] for o in objs if k in o]
            # Update required set: only keys present in ALL objects are required
            if len(vals_for_k) != len(objs) and k in required_keys:
                required_keys.remove(k)
            # Infer subschema recursively
            properties[k] = _infer_for_values(vals_for_k)

        schema.update(
            {
                "properties": properties,
                # Keys present in all examples are required
                **({"required": sorted(required_keys)} if required_keys else {}),
                # Be precise to observed structure; change to True if you want to allow unknown keys
                "additionalProperties": False,
            }
        )

    # Array-specific
    if arrs:
        all_items: List[JsonValue] = [item for a in arrs for item in a]
        items_schema: Dict[str, Any] = dict()
        if all_items:
            items_schema = _infer_for_values(all_items)
        schema.update({"items": items_schema})

    # Assign type
    if present_types:
        schema["type"] = present_types[0] if len(present_types) == 1 else present_types

    return schema


def generate_schema(objects: list[JsonValue]) -> JsonValue:
    """Generate a JSON Schema that validates all given JSON values.

    The produced schema is conservative: it constrains to the observed structure
    (e.g., required object keys are those present in all samples, arrays get
    minItems/maxItems from observed lengths, etc.). It does not include $schema
    to keep it embeddable; add it at the call site if desired.
    """

    return _infer_for_values(list(objects))


if __name__ == "__main__":

    tests = [[{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}]]

    for test in tests:
        schema = generate_schema(test)
        print(test)
        print(schema)
