"""Main settings for Flowgen code.

# Usage

## Standard usage
    from flowgen.configuration import cfg
    myval = cfg.optuna.study_name

## Customize how settings are loaded etc.
    from flowgen.configuration import Settings
    custom_cfg = Settings(...)

# Development

- Keep the Settings class clean and put settings in namespaced models.
- You must add your model to the Settings class for it to be registered.
- Use Pydantic's built-in types where relevant for improved parsing and validation.
    https://docs.pydantic.dev/latest/api/types/
- Set sane defaults for local development wherever possible.

See https://docs.pydantic.dev/latest/concepts/pydantic_settings/ for documentation.
Also see https://github.com/makukha/pydantic-file-secrets for the secrets loader

# Configuration directory

By default, configuration files and secrets are located in the directory above this one.
To change where configuration files are loaded, set the environment variable (case-insensitive)
    export FLOWGEN_CONFIG_DIR=/path/to/configs
or use the command-line flag
    $ python flowgen/mymodule.py --config_dir /path/to/configs

# Setting configuration values

How to override default configuration values (in resolution priority order):
    1. Pass to init:
        cfg = Settings(azure_oai={'api_version': 'foobar'})

    2. Set environment variable with prefix and __ nested separator (case-insensitive)
        export FLOWGEN_AZURE_OAI__API_VERSION=foobar
        export flowgen_azure_oai__api_version=foobar

    3. Set environment variables in .env file using naming scheme shown above.

    4. Store secret variables in the runtime-secrets directory. Filenames are
       the variable name without the FLOWGEN_ prefix

        $ cat runtime-secrets/azure_oai__api_key
        asdf1324asdf1234asdf1234
        $ cat runtime-secrets/azure_oai/api_key  # Alternative file structure
        asdf1324asdf1234asdf1234

       Secrets will also be loaded from other sources if present.

    5. (Preferred method) Set the variable in the YAML file config.yaml
        azure_oai:
          api_version: foobar
        logging:
          name: mylogs
"""

import logging
import os
import socket
import tempfile
import typing as T
from pathlib import Path

from pydantic import BaseModel, Field, HttpUrl, SecretStr, field_validator
from pydantic_file_secrets import FileSecretsSettingsSource
from pydantic_settings import (
    BaseSettings,
    PydanticBaseSettingsSource,
    SecretsSettingsSource,
    SettingsConfigDict,
    YamlConfigSettingsSource,
)
from sqlalchemy import Engine, create_engine
from typing_extensions import Annotated

REPO_ROOT = Path(os.getenv("REPO_ROOT", Path(__file__).parent.parent))
logging.info(f"Repository root is: {REPO_ROOT}")

HOSTNAME = socket.gethostname()

EVAL__RAISE_ON_EXCEPTION = False
EVAL__ENABLE_RETRY = True

S3_TIMEOUT = 3600

NON_OPENAI_CONTEXT_WINDOW_FACTOR = 0.85

"""
Namespaced configuration classes.

These must be attached to the Settings model below to be
loaded as configuration.
"""


class Paths(BaseModel):
    root_dir: Path = REPO_ROOT
    flowgen_dir: Path = REPO_ROOT / "minimal"
    data_dir: Annotated[Path, Field(validate_default=True)] = flowgen_dir / "data"
    templates_dir: Path = data_dir / "templates"
    tmp_dir: Path = Path(tempfile.gettempdir())
    huggingface_cache: Annotated[Path, Field(validate_default=True)] = (
        tmp_dir / "huggingface"
    )

    @property
    def templates_without_context(self) -> Path:
        return self.templates_dir / "templates_without_context.json"

    @property
    def templates_with_context(self) -> Path:
        return self.templates_dir / "templates_with_context.json"

    @property
    def agentic_templates(self) -> Path:
        return self.templates_dir / "agentic_templates.json"

    @field_validator(
        "data_dir",
        "huggingface_cache",
        mode="after",
    )
    @classmethod
    def path_exists(cls, path: Path) -> Path:
        path.mkdir(parents=True, exist_ok=True)
        return path


class Logging(BaseModel):
    name: str = "flowgen"
    filename: str = "flowgen.log"
    level: int = logging.INFO
    use_colors: bool = False
    color_format: str = (
        "%(log_color)s[%(levelname)1.1s %(asctime)s]%(reset)s %(message)s"
    )
    normal_format: str = "[%(levelname)1.1s %(asctime)s] %(message)s"
    show_progress: bool = False


class AzureInferenceLlama(BaseModel):
    # Use cfg.azure_inference_llama.api_key.get_secret_value() to get value
    api_key: SecretStr = SecretStr("NOT SET")
    region_name: str | None = None
    default_deployment: str | None = None


class AzureInferenceLlama33(BaseModel):
    # Use cfg.azure_inference_llama.api_key.get_secret_value() to get value
    api_key: SecretStr = SecretStr("NOT SET")
    region_name: str | None = None
    default_deployment: str | None = None


class AzureInferenceMistral(BaseModel):
    # Use cfg.azure_inference_mistral.api_key.get_secret_value() to get value
    api_key: SecretStr = SecretStr("NOT SET")
    region_name: str | None = None
    default_deployment: str | None = None


class AzureInferencePhi4(BaseModel):
    # Use cfg.azure_inference_phi4.api_key.get_secret_value() to get value
    api_key: SecretStr = SecretStr("NOT SET")
    region_name: str | None = None
    default_deployment: str | None = None


class AzureOAI(BaseModel):
    # Use cfg.azure_oai.api_key.get_secret_value() to get value
    api_key: SecretStr = SecretStr("NOT SET")
    default_deployment: str = "gpt-4o-mini"
    api_url: HttpUrl = HttpUrl("https://your-resource-group.openai.azure.com/")

    api_version: str = "2024-07-18"
    api_type: str = "azure"


class GCPVertex(BaseModel):
    # Use cfg.gcp_vertex.credentials.get_secret_value() to get value
    # Note credentials are a string, typically will need to do a json.loads
    project_id: str | None = None
    region: str | None = None
    credentials: SecretStr = SecretStr("NOT SET")


class Instrumentation(BaseModel):
    tracing_enabled: bool = Field(
        default=False,
        description=(
            "Enable Arize Phoenix tracing for debug. "
            "Requires running phoenix endpoint. "
            "Start up a local instance with `phoenix serve`"
        ),
    )
    arize_endpoint: str = "http://127.0.0.1:6006/v1/traces"


class LlamaIndexGeneral(BaseModel):
    default_tool_choice: str = "auto"


class Evaluation(BaseModel):
    mode: T.Literal["random"] = "random"
    min_reporting_success_rate: float = 0.5
    num_eval_batch: int = 5
    max_eval_failure_rate: float = 0.5
    objective_2_name: T.Literal["cost", "latency"] = "cost"


EmbeddingDeviceType = T.Literal["cpu", "mps", "cuda", None]


class Resources(BaseModel):
    embedding_device: EmbeddingDeviceType = None


"""
Build the main Settings class
"""


class Settings(BaseSettings):
    """Flowgen Settings class. See module docstring for usage details."""

    azure_inference_llama33: AzureInferenceLlama33 = AzureInferenceLlama33()
    azure_inference_llama: AzureInferenceLlama = AzureInferenceLlama()
    azure_inference_mistral: AzureInferenceMistral = AzureInferenceMistral()
    azure_inference_phi4: AzureInferencePhi4 = AzureInferencePhi4()
    azure_oai: AzureOAI = AzureOAI()
    evaluation: Evaluation = Evaluation()
    gcp_vertex: GCPVertex = GCPVertex()
    instrumentation: Instrumentation = Instrumentation()
    llama_index: LlamaIndexGeneral = LlamaIndexGeneral()
    logging: Logging = Logging()
    paths: Paths = Paths()
    resources: Resources = Resources()
    study_config_file: T.Optional[Path] = None

    # Meta-configuration (where/how to load settings values)
    model_config = SettingsConfigDict(
        yaml_file="config.yaml",
        secrets_dir="runtime-secrets",
        env_file=".env",
        env_prefix="FLOWGEN_",
        env_nested_delimiter="__",
        extra="ignore",
    )

    @classmethod
    def settings_customise_sources(
        cls,
        settings_cls: T.Type[BaseSettings],
        init_settings: PydanticBaseSettingsSource,
        env_settings: PydanticBaseSettingsSource,
        dotenv_settings: PydanticBaseSettingsSource,
        file_secret_settings: PydanticBaseSettingsSource,
    ) -> T.Tuple[PydanticBaseSettingsSource, ...]:
        """Return order dictates setting resolution priority order (first is highest).

        Add new config sources by inserting them in the returned tuple.

        Rationale for ordering:
        - init variables are most likely to be used for unit tests.
        - env vars are most likely to be used to override settings at runtime
        - dotenv settings should be roughly equivalent to actual env vars
        - secrets files should override any defaults, but be secondary to env vars
        - YAML config is the default mechanism for customization
        """
        # Make mypy happy and avoid weird mistakes
        assert isinstance(file_secret_settings, SecretsSettingsSource)
        # Main sources list
        sources: T.Tuple[PydanticBaseSettingsSource, ...] = (
            init_settings,
            env_settings,
            dotenv_settings,
            FileSecretsSettingsSource(  # Third-party extension to improve secrets loading
                file_secret_settings, secrets_dir_missing="ok", secrets_prefix=""
            ),
            YamlConfigSettingsSource(settings_cls),
        )
        return sources


cfg = Settings()
