


class DeepTBMRegressor(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dims=[512, 256, 128, 64]):
        super(DeepTBMRegressor, self).__init__()
        layers = []
        prev_dim = input_dim
        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.3)
            ])
            prev_dim = hidden_dim
        layers.append(nn.Linear(prev_dim, output_dim))
        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)


class OptimizedTBMModel:
    def __init__(self, data_path, use_gpu=True):
        self.data_path = data_path
        self.use_gpu = use_gpu and TORCH_AVAILABLE
        self.data = None
        self.active_params = None
        self.passive_params = None
        self.geological_params = None
        self.geological_situations = None

        self.scalers = {
            'standard': StandardScaler(),
            'robust': RobustScaler(),
            'quantile': QuantileTransformer(n_quantiles=1000, random_state=42)
        }

        self.base_models = {}
        self.ensemble_models = {}
        self.deep_models = {}

    def load_and_preprocess_data(self):
        try:
            self.data = pd.read_excel(self.data_path)
            self.active_params = self.data.iloc[:, 1:12]
            self.passive_params = self.data.iloc[:, 12:29]
            self.geological_params = self.data.iloc[:, 29:33]
            self._advanced_data_cleaning()
            return True
        except Exception:
            return False

    def _advanced_data_cleaning(self):
        for df in [self.active_params, self.passive_params, self.geological_params]:
            if df.isnull().sum().sum() > 0:
                df_numeric = df.select_dtypes(include=[np.number])
                df_numeric = df_numeric.interpolate(method='cubic', limit_direction='both')
                df_numeric = df_numeric.fillna(df_numeric.median())
                for col in df_numeric.columns:
                    df[col] = df_numeric[col]
        self._robust_outlier_detection()
        self._advanced_feature_engineering()

    def _robust_outlier_detection(self):
        def detect_outliers_modified_zscore(df, threshold=3.5):
            median = df.median()
            mad = np.median(np.abs(df - median))
            modified_z_scores = 0.6745 * (df - median) / mad
            return np.abs(modified_z_scores) > threshold

        all_outliers = pd.Series(False, index=self.active_params.index)
        for df in [self.active_params, self.passive_params, self.geological_params]:
            outliers = detect_outliers_modified_zscore(df).any(axis=1)
            all_outliers |= outliers

        if all_outliers.sum() > 0:
            outlier_threshold = max(1, int(all_outliers.sum() * 0.05))
            outlier_scores = []
            for idx in self.active_params.index:
                score = 0
                for df in [self.active_params, self.passive_params, self.geological_params]:
                    if idx in df.index:
                        try:
                            from scipy.spatial.distance import mahalanobis
                            mean = df.mean()
                            cov = df.cov()
                            if np.linalg.det(cov) != 0:
                                inv_cov = np.linalg.inv(cov)
                                score += mahalanobis(df.loc[idx], mean, inv_cov)
                        except:
                            score += np.linalg.norm(df.loc[idx] - df.mean())
                outlier_scores.append((idx, score))
            outlier_scores.sort(key=lambda x: x[1], reverse=True)
            indices_to_remove = [idx for idx, _ in outlier_scores[:outlier_threshold]]
            self.active_params = self.active_params.drop(indices_to_remove).reset_index(drop=True)
            self.passive_params = self.passive_params.drop(indices_to_remove).reset_index(drop=True)
            self.geological_params = self.geological_params.drop(indices_to_remove).reset_index(drop=True)

    def _advanced_feature_engineering(self):
        from sklearn.preprocessing import PolynomialFeatures
        poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
        important_passive = self.passive_params.iloc[:, :8]
        poly_features = poly.fit_transform(important_passive)
        poly_feature_names = poly.get_feature_names_out(important_passive.columns)
        interaction_mask = ['*' in name for name in poly_feature_names]
        poly_features_filtered = poly_features[:, interaction_mask]
        poly_names_filtered = [name for name, mask in zip(poly_feature_names, interaction_mask) if mask]
        poly_df = pd.DataFrame(poly_features_filtered, columns=poly_names_filtered)
        self.passive_params = pd.concat([self.passive_params, poly_df], axis=1)

        passive_array = self.passive_params.values
        stat_features = {
            'mean': np.mean(passive_array, axis=1),
            'std': np.std(passive_array, axis=1),
            'skew': [float(pd.Series(row).skew()) for row in passive_array],
            'kurt': [float(pd.Series(row).kurtosis()) for row in passive_array]
        }
        stat_df = pd.DataFrame(stat_features)
        self.passive_params = pd.concat([self.passive_params, stat_df], axis=1)

        geo_array = self.geological_params.values
        geo_combinations = {
            'geo_sum': np.sum(geo_array, axis=1),
            'geo_product': np.prod(geo_array, axis=1)
        }
        if geo_array.shape[1] >= 2:
            geo_combinations['geo_ratio_1'] = geo_array[:, 0] / (geo_array[:, 1] + 1e-8)
        if geo_array.shape[1] >= 4:
            geo_combinations['geo_ratio_2'] = geo_array[:, 2] / (geo_array[:, 3] + 1e-8)
        geo_comb_df = pd.DataFrame(geo_combinations)
        self.geological_params = pd.concat([self.geological_params, geo_comb_df], axis=1)

    def intelligent_geological_clustering(self):
        geo_scaled_methods = {}
        for method_name, scaler in self.scalers.items():
            geo_scaled_methods[method_name] = scaler.fit_transform(self.geological_params)

        optimal_clusters = self._find_optimal_clusters(geo_scaled_methods['robust'])
        clustering_results = {}
        kmeans = KMeans(n_clusters=optimal_clusters, random_state=42, n_init=20)
        clustering_results['kmeans'] = kmeans.fit_predict(geo_scaled_methods['robust'])

        try:
            from sklearn.neighbors import NearestNeighbors
            neighbors = NearestNeighbors(n_neighbors=min(5, len(geo_scaled_methods['robust'])))
            neighbors_fit = neighbors.fit(geo_scaled_methods['robust'])
            distances, _ = neighbors_fit.kneighbors(geo_scaled_methods['robust'])
            distances = np.sort(distances, axis=0)
            distances = distances[:, 1]
            eps = max(0.1, np.percentile(distances, 90))
            if eps <= 1e-10:
                eps = 0.5
            dbscan = DBSCAN(eps=eps, min_samples=max(2, min(5, len(geo_scaled_methods['robust']) // 10)))
            clustering_results['dbscan'] = dbscan.fit_predict(geo_scaled_methods['robust'])
        except Exception:
            clustering_results['dbscan'] = np.zeros(len(geo_scaled_methods['robust']))

        try:
            from sklearn.mixture import GaussianMixture
            n_components = min(optimal_clusters, len(geo_scaled_methods['robust']) - 1)
            gmm = GaussianMixture(n_components=n_components, random_state=42, max_iter=100)
            clustering_results['gmm'] = gmm.fit_predict(geo_scaled_methods['robust'])
        except Exception:
            clustering_results['gmm'] = clustering_results['kmeans'].copy()

        self.geological_situations = self._ensemble_clustering(clustering_results)
        return self.geological_situations

    def _find_optimal_clusters(self, data):
        from sklearn.metrics import silhouette_score, calinski_harabasz_score
        scores = []
        k_range = range(2, min(12, len(data) // 10))
        for k in tqdm(k_range, desc="Searching optimal clusters"):
            kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
            labels = kmeans.fit_predict(data)
            silhouette = silhouette_score(data, labels)
            calinski = calinski_harabasz_score(data, labels)
            score = 0.6 * silhouette + 0.4 * (calinski / 1000)
            scores.append(score)
        optimal_k = k_range[np.argmax(scores)]
        return optimal_k

    def _ensemble_clustering(self, clustering_results):
        from scipy.stats import mode
        if not clustering_results:
            return np.zeros(len(self.geological_params))
        standardized_results = []
        for labels in clustering_results.values():
            if labels is None or len(labels) == 0:
                continue
            if -1 in labels:
                try:
                    mask = labels != -1
                    if np.sum(mask) > 3:
                        knn = KNeighborsClassifier(n_neighbors=min(3, np.sum(mask)))
                        knn.fit(np.arange(len(labels))[mask].reshape(-1, 1), labels[mask])
                        noise_indices = labels == -1
                        labels[noise_indices] = knn.predict(np.arange(len(labels))[noise_indices].reshape(-1, 1))
                    else:
                        labels[labels == -1] = 0
                except Exception:
                    labels[labels == -1] = 0
            standardized_results.append(labels)
        if not standardized_results:
            return np.zeros(len(self.geological_params))
        ensemble_labels = []
        for i in range(len(standardized_results[0])):
            votes = [result[i] for result in standardized_results if i < len(result)]
            if votes:
                try:
                    final_label = mode(votes, keepdims=True)[0][0]
                except:
                    final_label = votes[0]
            else:
                final_label = 0
            ensemble_labels.append(final_label)
        return np.array(ensemble_labels)

    def build_optimized_models(self):
        self._optimize_features()
        unique_situations = np.unique(self.geological_situations)
        for situation in tqdm(unique_situations, desc="Building models for geological situations"):
            print(f"\n⛰️ Building model for geological situation {situation}...")
            mask = self.geological_situations == situation
            if np.sum(mask) < 20:
                print(f"  ⚠️ Situation {situation} has insufficient data, skipped")
                continue
            X_situation = self.passive_params_processed[mask]
            y_situation = self.active_params_processed[mask]
            situation_models = self._build_situation_models(X_situation, y_situation, situation)
            if situation_models:
                self.ensemble_models[situation] = situation_models

    def _optimize_features(self):
        best_scaler = self._select_best_scaler()
        self.passive_params_processed = best_scaler.fit_transform(self.passive_params)
        self.active_params_processed = self.scalers['robust'].fit_transform(self.active_params)
        self.passive_params_processed = self._select_best_features(
            self.passive_params_processed, self.active_params_processed
        )

    def _select_best_scaler(self):
        best_score = -np.inf
        best_scaler = None
        for scaler_name, scaler in self.scalers.items():
            X_scaled = scaler.fit_transform(self.passive_params)
            rf = RandomForestRegressor(n_estimators=50, random_state=42)
            scores = cross_val_score(rf, X_scaled, self.active_params.iloc[:, 0],
                                     cv=3, scoring='r2')
            avg_score = np.mean(scores)
            print(f"    {scaler_name}: R² = {avg_score:.4f}")
            if avg_score > best_score:
                best_score = avg_score
                best_scaler = scaler
        print(f"  ✅ Best scaler selected, R² = {best_score:.4f}")
        return best_scaler

    def _select_best_features(self, X, y):
        print("  🎯 Selecting best features...")
        n_features_to_select = min(50, X.shape[1] // 2)
        var_selector = VarianceThreshold(threshold=0.01)
        X_var = var_selector.fit_transform(X)
        mi_selector = SelectKBest(score_func=mutual_info_regression,
                                  k=min(n_features_to_select, X_var.shape[1]))
        X_mi = mi_selector.fit_transform(X_var, y[:, 0])
        rfe_estimator = ExtraTreesRegressor(n_estimators=50, random_state=42)
        rfe = RFE(estimator=rfe_estimator,
                  n_features_to_select=min(30, X_mi.shape[1]))
        X_final = rfe.fit_transform(X_mi, y[:, 0])
        print(f"    Original features: {X.shape[1]}")
        print(f"    After variance filter: {X_var.shape[1]}")
        print(f"    After mutual information: {X_mi.shape[1]}")
        print(f"    After RFE: {X_final.shape[1]}")
        return X_final

    def _build_situation_models(self, X, y, situation):
        situation_models = {}
        X_train, X_val, y_train, y_val = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        for i, param_name in enumerate(tqdm(self.active_params.columns,
                                          desc=f"  Modeling parameters for situation {situation}")):
            y_param_train = y_train[:, i]
            y_param_val = y_val[:, i]
            base_models = self._create_base_models(X_train, y_param_train, X_val, y_param_val)
            enhanced_models = self._apply_semi_supervised_learning(X, y[:, i], base_models)
            if TORCH_AVAILABLE and len(X_train) > 100:
                deep_model = self._build_deep_model(X_train, y_param_train, X_val, y_param_val)
                if deep_model:
                    enhanced_models['deep'] = deep_model
            if len(enhanced_models) > 1:
                ensemble_model = self._create_ensemble(enhanced_models, X_val, y_param_val)
                situation_models[param_name] = ensemble_model
            elif len(enhanced_models) == 1:
                situation_models[param_name] = list(enhanced_models.values())[0]
        return situation_models
    def _create_base_models(self, X_train, y_train, X_val, y_val):
        base_models = {}
        model_configs = {
            'rf': {
                'model': RandomForestRegressor,
                'params': {
                    'n_estimators': 500,
                    'max_depth': 20,
                    'min_samples_split': 5,
                    'min_samples_leaf': 2,
                    'random_state': 42,
                    'n_jobs': -1
                }
            },
            'et': {
                'model': ExtraTreesRegressor,
                'params': {
                    'n_estimators': 500,
                    'max_depth': 20,
                    'min_samples_split': 5,
                    'min_samples_leaf': 2,
                    'random_state': 42,
                    'n_jobs': -1
                }
            },
            'gbr': {
                'model': GradientBoostingRegressor,
                'params': {
                    'n_estimators': 500,
                    'learning_rate': 0.05,
                    'max_depth': 8,
                    'subsample': 0.8,
                    'random_state': 42
                }
            },
            'xgb': {
                'model': xgb.XGBRegressor,
                'params': {
                    'n_estimators': 500,
                    'learning_rate': 0.05,
                    'max_depth': 8,
                    'subsample': 0.8,
                    'colsample_bytree': 0.8,
                    'random_state': 42,
                    'n_jobs': -1
                }
            },
            'lgb': {
                'model': lgb.LGBMRegressor,
                'params': {
                    'n_estimators': 500,
                    'learning_rate': 0.05,
                    'max_depth': 8,
                    'subsample': 0.8,
                    'colsample_bytree': 0.8,
                    'random_state': 42,
                    'n_jobs': -1,
                    'verbose': -1
                }
            }
        }

        try:
            model_configs['catboost'] = {
                'model': CatBoostRegressor,
                'params': {
                    'iterations': 500,
                    'learning_rate': 0.05,
                    'depth': 8,
                    'random_state': 42,
                    'verbose': False,
                    'task_type': 'GPU' if self.use_gpu else 'CPU'
                }
            }
        except:
            model_configs['catboost'] = {
                'model': CatBoostRegressor,
                'params': {
                    'iterations': 500,
                    'learning_rate': 0.05,
                    'depth': 8,
                    'random_state': 42,
                    'verbose': False,
                    'task_type': 'CPU'
                }
            }

        pbar = tqdm(model_configs.items(), desc="Training base models", leave=False)
        for model_name, config in pbar:
            try:
                model = config['model'](**config['params'])
                model.fit(X_train, y_train)
                val_pred = model.predict(X_val)
                val_r2 = r2_score(y_val, val_pred)
                val_rmse = np.sqrt(mean_squared_error(y_val, val_pred))
                val_mae = mean_absolute_error(y_val, val_pred)
                pbar.set_postfix({
                    'Model': model_name,
                    'R²': f'{val_r2:.3f}',
                    'RMSE': f'{val_rmse:.3f}',
                    'MAE': f'{val_mae:.3f}'
                })
                if val_r2 > 0.1:
                    base_models[model_name] = model
                    print(f"      ✅ {model_name}: R²={val_r2:.4f}, RMSE={val_rmse:.4f}")
                else:
                    print(f"      ❌ {model_name}: R²={val_r2:.4f} (weak performance)")
            except Exception as e:
                pbar.set_postfix({'Model': model_name, 'Status': 'Failed'})
                print(f"      ⚠️ {model_name} training failed: {e}")
                continue
        return base_models

    def _apply_semi_supervised_learning(self, X, y, base_models):
        enhanced_models = base_models.copy()
        if len(X) < 30:
            return enhanced_models
        try:
            if len(base_models) >= 2:
                n_labeled = max(5, len(X) // 4)
                labeled_indices = np.random.choice(len(X), n_labeled, replace=False)
                unlabeled_indices = np.setdiff1d(np.arange(len(X)), labeled_indices)
                X_labeled = X[labeled_indices]
                y_labeled = y[labeled_indices]
                X_unlabeled = X[unlabeled_indices]
                enhanced_model = self._co_training_regression(
                    X_labeled, y_labeled, X_unlabeled, list(base_models.values())[:2]
                )
                if enhanced_model:
                    enhanced_models['co_training'] = enhanced_model

            enhanced_model = self._data_augmentation_training(X, y, base_models)
            if enhanced_model:
                enhanced_models['augmented'] = enhanced_model

            enhanced_model = self._iterative_training(X, y, base_models)
            if enhanced_model:
                enhanced_models['iterative'] = enhanced_model
        except Exception as e:
            print(f"    ⚠️ Semi-supervised learning failed: {e}")
        return enhanced_models

    def _co_training_regression(self, X_labeled, y_labeled, X_unlabeled, models):
        try:
            if len(models) < 2:
                return None
            model1, model2 = models[0], models[1]
            model1.fit(X_labeled, y_labeled)
            model2.fit(X_labeled, y_labeled)
            for _ in range(5):
                if len(X_unlabeled) == 0:
                    break
                pred1 = model1.predict(X_unlabeled)
                pred2 = model2.predict(X_unlabeled)
                consistency = np.abs(pred1 - pred2)
                threshold = np.percentile(consistency, 20)
                consistent_mask = consistency <= threshold
                if np.sum(consistent_mask) == 0:
                    break
                new_X = X_unlabeled[consistent_mask]
                new_y = (pred1[consistent_mask] + pred2[consistent_mask]) / 2
                X_labeled = np.vstack([X_labeled, new_X])
                y_labeled = np.hstack([y_labeled, new_y])
                X_unlabeled = X_unlabeled[~consistent_mask]
                model1.fit(X_labeled, y_labeled)
                model2.fit(X_labeled, y_labeled)
            ensemble = VotingRegressor([('model1', model1), ('model2', model2)])
            ensemble.fit(X_labeled, y_labeled)
            return ensemble
        except Exception as e:
            print(f"      Co-training failed: {e}")
            return None

    def _data_augmentation_training(self, X, y, base_models):
        try:
            if not base_models:
                return None
            noise_level = 0.01
            n_augmented = len(X) // 2
            indices = np.random.choice(len(X), n_augmented, replace=True)
            X_aug = X[indices].copy()
            y_aug = y[indices].copy()
            noise = np.random.normal(0, noise_level, X_aug.shape)
            X_aug += noise
            X_combined = np.vstack([X, X_aug])
            y_combined = np.hstack([y, y_aug])
            best_model = list(base_models.values())[0]
            best_model_class = type(best_model)
            augmented_model = best_model_class(**best_model.get_params())
            augmented_model.fit(X_combined, y_combined)
            return augmented_model
        except Exception as e:
            print(f"      Data augmentation failed: {e}")
            return None

    def _iterative_training(self, X, y, base_models):
        try:
            if not base_models:
                return None
            best_model = list(base_models.values())[0]
            best_model_class = type(best_model)
            iterative_model = best_model_class(**best_model.get_params())
            n_iterations = 5
            sample_size = len(X) // n_iterations
            for i in range(n_iterations):
                start_idx = i * sample_size
                end_idx = min((i + 1) * sample_size, len(X))
                if i == 0:
                    iterative_model.fit(X[start_idx:end_idx], y[start_idx:end_idx])
                else:
                    if hasattr(iterative_model, 'partial_fit'):
                        iterative_model.partial_fit(X[start_idx:end_idx], y[start_idx:end_idx])
                    else:
                        iterative_model.fit(X[:end_idx], y[:end_idx])
            return iterative_model
        except Exception as e:
            print(f"      Iterative training failed: {e}")
            return None

    def _build_deep_model(self, X_train, y_train, X_val, y_val):
        if not TORCH_AVAILABLE:
            return None
        try:
            device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
            X_train_tensor = torch.FloatTensor(X_train).to(device)
            y_train_tensor = torch.FloatTensor(y_train.reshape(-1, 1)).to(device)
            X_val_tensor = torch.FloatTensor(X_val).to(device)
            y_val_tensor = torch.FloatTensor(y_val.reshape(-1, 1)).to(device)
            train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
            train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

            model = DeepTBMRegressor(
                input_dim=X_train.shape[1],
                output_dim=1,
                hidden_dims=[256, 128, 64, 32]
            ).to(device)

            criterion = nn.MSELoss()
            optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10)

            best_val_loss = float('inf')
            best_val_r2 = -float('inf')
            patience_counter = 0
            best_model_state = None

            pbar = tqdm(range(200), desc="Deep learning training", leave=False)
            for epoch in pbar:
                model.train()
                train_loss = 0
                n_batches = 0
                for batch_X, batch_y in train_loader:
                    optimizer.zero_grad()
                    outputs = model(batch_X)
                    loss = criterion(outputs, batch_y)
                    loss.backward()
                    optimizer.step()
                    train_loss += loss.item()
                    n_batches += 1
                avg_train_loss = train_loss / n_batches

                model.eval()
                with torch.no_grad():
                    val_outputs = model(X_val_tensor)
                    val_loss = criterion(val_outputs, y_val_tensor).item()
                    val_pred_np = val_outputs.cpu().numpy().flatten()
                    val_true_np = y_val
                    val_r2 = r2_score(val_true_np, val_pred_np)
                    val_rmse = np.sqrt(mean_squared_error(val_true_np, val_pred_np))
                scheduler.step(val_loss)
                pbar.set_postfix({
                    'Train_Loss': f'{avg_train_loss:.4f}',
                    'Val_Loss': f'{val_loss:.4f}',
                    'Val_R²': f'{val_r2:.3f}',
                    'Val_RMSE': f'{val_rmse:.3f}',
                    'Patience': f'{patience_counter}/20'
                })
                if val_r2 > best_val_r2:
                    best_val_loss = val_loss
                    best_val_r2 = val_r2
                    patience_counter = 0
                    best_model_state = model.state_dict().copy()
                else:
                    patience_counter += 1
                    if patience_counter >= 20:
                        pbar.set_description("Deep learning training (early stop)")
                        break
            print(f"        🎯 Best Val R²: {best_val_r2:.4f}, Best Val Loss: {best_val_loss:.4f}")
            if best_model_state:
                model.load_state_dict(best_model_state)

            class TorchPredictor:
                def __init__(self, model, device):
                    self.model = model
                    self.device = device

                def predict(self, X):
                    self.model.eval()
                    with torch.no_grad():
                        X_tensor = torch.FloatTensor(X).to(self.device)
                        outputs = self.model(X_tensor)
                        return outputs.cpu().numpy().flatten()

            return TorchPredictor(model, device)
        except Exception as e:
            print(f"    ⚠️ Deep learning model build failed: {e}")
            return None
    def _create_ensemble(self, models, X_val, y_val):
        if not models:
            return None
        model_scores = {}
        for name, model in models.items():
            try:
                pred = model.predict(X_val)
                r2 = r2_score(y_val, pred)
                rmse = np.sqrt(mean_squared_error(y_val, pred))
                mae = mean_absolute_error(y_val, pred)
                combined_score = r2 - 0.1 * (rmse + mae) / 2
                model_scores[name] = {
                    'score': combined_score,
                    'r2': r2,
                    'rmse': rmse,
                    'mae': mae
                }
            except Exception:
                model_scores[name] = {
                    'score': -999,
                    'r2': -999,
                    'rmse': 999,
                    'mae': 999
                }
        if model_scores:
            best_model_name = max(model_scores.keys(), key=lambda x: model_scores[x]['score'])
            best_metrics = model_scores[best_model_name]
            print(f"        🏆 Best model: {best_model_name}")
            print(f"           R²={best_metrics['r2']:.4f}, RMSE={best_metrics['rmse']:.4f}, MAE={best_metrics['mae']:.4f}")
            return models[best_model_name]
        return None

    def comprehensive_evaluation(self):
        print("\n📊 Running comprehensive evaluation...")
        evaluation_results = {}
        all_metrics = {'r2_scores': [], 'rmse_scores': [], 'mae_scores': [], 'mape_scores': []}
        pbar = tqdm(self.ensemble_models.items(), desc="Evaluating geological situation models")
        for situation, models in pbar:
            mask = self.geological_situations == situation
            X_test = self.passive_params_processed[mask]
            y_test = self.active_params_processed[mask]
            if len(X_test) < 5:
                continue
            situation_results = {}
            situation_metrics = []
            for i, (param_name, model) in enumerate(models.items()):
                y_true = y_test[:, i]
                try:
                    cv_scores = cross_val_score(model, X_test, y_true,
                                                cv=min(5, len(X_test)),
                                                scoring='r2')
                    y_pred = model.predict(X_test)
                    r2 = r2_score(y_true, y_pred)
                    mse = mean_squared_error(y_true, y_pred)
                    rmse = np.sqrt(mse)
                    mae = mean_absolute_error(y_true, y_pred)
                    mape = np.mean(np.abs((y_true - y_pred) / np.maximum(np.abs(y_true), 1e-8))) * 100
                    correlation = np.corrcoef(y_true, y_pred)[0, 1]
                    situation_results[param_name] = {
                        'R2': r2, 'RMSE': rmse, 'MSE': mse,
                        'MAE': mae, 'MAPE': mape, 'Correlation': correlation,
                        'CV_R2_mean': cv_scores.mean(), 'CV_R2_std': cv_scores.std(),
                        'Sample_Size': len(y_true)
                    }
                    all_metrics['r2_scores'].append(r2)
                    all_metrics['rmse_scores'].append(rmse)
                    all_metrics['mae_scores'].append(mae)
                    all_metrics['mape_scores'].append(mape)
                    situation_metrics.append(r2)
                except Exception as e:
                    print(f"    ⚠️ Evaluation failed {param_name}: {e}")
                    continue
            if situation_metrics:
                avg_r2 = np.mean(situation_metrics)
                pbar.set_postfix({'Situation': situation, 'Avg_R²': f'{avg_r2:.3f}', 'Params': len(situation_metrics)})
            evaluation_results[situation] = situation_results
        self.evaluation_results = evaluation_results
        self._select_global_best_models()
        self.save_evaluation_results()
        return evaluation_results

    def _select_global_best_models(self):
        print(f"\n🏆 Selecting global best models...")
        self.global_best_models = {}
        best_overall_r2 = -1
        best_overall_model = None
        best_overall_info = None
        for param_name in self.active_params.columns:
            best_r2 = -1
            best_model = None
            best_situation = None
            best_metrics = None
            for situation, models in self.ensemble_models.items():
                if param_name in models and param_name in self.evaluation_results.get(situation, {}):
                    metrics = self.evaluation_results[situation][param_name]
                    r2 = metrics['R2']
                    if r2 > best_r2:
                        best_r2 = r2
                        best_model = models[param_name]
                        best_situation = situation
                        best_metrics = metrics
            if best_model is not None:
                self.global_best_models[param_name] = {
                    'model': best_model,
                    'situation': best_situation,
                    'metrics': best_metrics
                }
                print(f"  📈 {param_name}: R²={best_r2:.4f} (situation {best_situation})")
                if best_r2 > best_overall_r2:
                    best_overall_r2 = best_r2
                    best_overall_model = param_name
                    best_overall_info = (best_situation, best_metrics)
        if best_overall_model:
            print(f"\n🥇 Global best model:")
            print(f"   Parameter: {best_overall_model}")
            print(f"   Situation: {best_overall_info[0]}")
            print(f"   R²: {best_overall_info[1]['R2']:.4f}")
            print(f"   RMSE: {best_overall_info[1]['RMSE']:.4f}")
            print(f"   MAE: {best_overall_info[1]['MAE']:.4f}")
            print(f"   MAPE: {best_overall_info[1]['MAPE']:.2f}%")

    def save_evaluation_results(self):
        try:
            results_list = []
            for situation, params_results in self.evaluation_results.items():
                for param_name, metrics in params_results.items():
                    result_row = {
                        'Situation': situation,
                        'Parameter': param_name,
                        'R²': metrics['R2'],
                        'RMSE': metrics['RMSE'],
                        'MSE': metrics['MSE'],
                        'MAE': metrics['MAE'],
                        'MAPE(%)': metrics['MAPE'],
                        'Correlation': metrics['Correlation'],
                        'CV_R2_mean': metrics['CV_R2_mean'],
                        'CV_R2_std': metrics['CV_R2_std'],
                        'Sample_Size': metrics['Sample_Size']
                    }
                    results_list.append(result_row)
            if results_list:
                detailed_df = pd.DataFrame(results_list)
                detailed_df.to_excel('detailed_evaluation_results.xlsx', index=False)
                print("✅ Results saved: detailed_evaluation_results.xlsx")
        except Exception as e:
            print(f"⚠️ Saving evaluation results failed: {e}")

    def predict_with_uncertainty(self, passive_params, geological_params):
        passive_scaled = self.scalers['robust'].transform(passive_params.reshape(1, -1))
        geo_scaled = self.scalers['robust'].transform(geological_params.reshape(1, -1))
        distances = []
        for situation in self.ensemble_models.keys():
            mask = self.geological_situations == situation
            if np.sum(mask) > 0:
                geo_center = np.mean(self.scalers['robust'].transform(
                    self.geological_params[mask]
                ), axis=0)
                distance = np.linalg.norm(geo_scaled - geo_center)
                distances.append((situation, distance))
        if not distances:
            return None, None, None
        closest_situation = min(distances, key=lambda x: x[1])[0]
        predictions = {}
        uncertainties = {}
        for param_name, model in self.ensemble_models[closest_situation].items():
            try:
                if hasattr(model, 'estimators_'):
                    individual_preds = [est.predict(passive_scaled)[0] for est in model.estimators_]
                    predictions[param_name] = np.mean(individual_preds)
                    uncertainties[param_name] = np.std(individual_preds)
                else:
                    pred = model.predict(passive_scaled)[0]
                    predictions[param_name] = pred
                    uncertainties[param_name] = 0.0
            except Exception as e:
                print(f"Prediction failed {param_name}: {e}")
                continue
        return predictions, uncertainties, closest_situation
