import logging
import pandas as pd
from typing import Dict

# Statistical modeling
try:
    import statsmodels.api as sm
    import statsmodels.formula.api as smf
    from statsmodels.stats.outliers_influence import variance_inflation_factor

    HAS_STATSMODELS = True
except ImportError:
    HAS_STATSMODELS = False
    logging.warning("statsmodels not available - statistical modeling will be limited")

logger = logging.getLogger(__name__)


def run_statistical_models(
    cell_aggregates: pd.DataFrame, step_level_data: pd.DataFrame
) -> Dict:
    logger.info("Running statistical models...")
    model_results = {}
    try:
        agent_step_data = step_level_data[
            (step_level_data["at_risk_t"] == 1)
            & (step_level_data["is_epilogue_step"] == 0)
        ].copy()
        if len(agent_step_data) > 0:
            agent_step_data["step_factor"] = agent_step_data["step"].astype(str)
            hazard_factors = ["step_factor"]
            potential_factors = [
                "communication_type",
                "tool_use_policy",
                "visible_question_budget",
                "hedonic",
                "persona_age",
                "distraction",
                "reward_visibility",
            ]
            for factor in potential_factors:
                if (
                    factor in agent_step_data.columns
                    and agent_step_data[factor].nunique() > 1
                ):
                    hazard_factors.append(factor)
            hazard_formula = f"event_at_t ~ {' + '.join(hazard_factors)}"
            logger.info(f"Hazard model formula: {hazard_formula}")
            hazard_model = smf.glm(
                hazard_formula, data=agent_step_data, family=sm.families.Binomial()
            ).fit(cov_type="cluster", cov_kwds={"groups": agent_step_data["trial_id"]})
            model_results["hazard_model"] = {
                "summary": str(hazard_model.summary()),
                "params": hazard_model.params.to_dict(),
                "pvalues": hazard_model.pvalues.to_dict(),
            }
            logger.info("✓ Completed discrete-time hazard model")
        else:
            logger.warning("No data available for hazard model.")
            model_results["hazard_model"] = {"error": "No data available"}
    except Exception as e:
        logger.error(f"Discrete-time hazard model failed: {e}", exc_info=True)
        model_results["hazard_model"] = {"error": str(e)}
    return model_results
