{
  "instance_id": "scikit-learn__scikit-learn-13779",
  "repo": "scikit-learn/scikit-learn",
  "created_at": "2019-05-03T13:24:57Z",
  "problem_statement": "Voting estimator will fail at fit if weights are passed and an estimator is None\nBecause we don't check for an estimator to be `None` in `sample_weight` support, `fit` is failing`.\r\n\r\n```python\r\n    X, y = load_iris(return_X_y=True)\r\n    voter = VotingClassifier(\r\n        estimators=[('lr', LogisticRegression()),\r\n                    ('rf', RandomForestClassifier())]\r\n    )\r\n    voter.fit(X, y, sample_weight=np.ones(y.shape))\r\n    voter.set_params(lr=None)\r\n    voter.fit(X, y, sample_weight=np.ones(y.shape))\r\n```\r\n\r\n```\r\nAttributeError: 'NoneType' object has no attribute 'fit'\r\n```\n",
  "patch": "diff --git a/sklearn/ensemble/voting.py b/sklearn/ensemble/voting.py\n--- a/sklearn/ensemble/voting.py\n+++ b/sklearn/ensemble/voting.py\n@@ -78,6 +78,8 @@ def fit(self, X, y, sample_weight=None):\n \n         if sample_weight is not None:\n             for name, step in self.estimators:\n+                if step is None:\n+                    continue\n                 if not has_fit_parameter(step, 'sample_weight'):\n                     raise ValueError('Underlying estimator \\'%s\\' does not'\n                                      ' support sample weights.' % name)\n",
  "similar_bug_items": [
    {
      "pr_number": 2523,
      "pr_title": "[MRG] FIX #2481: add warning for bug in old numpy with unicode",
      "pr_body": "This is a fix for  #2481 which causes the jenkins tests to fail with numpy 1.3.0.\n",
      "issue_id": 2481,
      "issue_title": "LabelEncoder doesn't work correctly for unicode labels in Python 2.6 + numpy 1.3",
      "issue_body": "LabelEncoder works incorrectly for unicode labels in Python 2.6 + numpy 1.3. This is currently untested; to reproduce replace bytestrings with unicode strings here: https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/preprocessing/tests/test_label.py#L191\n\nThis is the cause of Jenkins failure that https://github.com/scikit-learn/scikit-learn/pull/2462 triggered.\n",
      "issue_closed_at": "2013-10-16T12:24:23Z",
      "base_commit": "a2580d6fe04340f535c624ae48b4a52a66cbc839",
      "changes": [
        {
          "file": "sklearn/preprocessing/label.py",
          "type": "line",
          "name": "line 8",
          "code": "\nfrom ..base import BaseEstimator, TransformerMixin\n\nfrom ..utils.fixes import unique\nfrom ..utils import deprecated, column_or_1d\n\nfrom ..utils.multiclass import unique_labels"
        },
        {
          "file": "sklearn/preprocessing/label.py",
          "type": "line",
          "name": "line 25",
          "code": "    'LabelEncoder',\n]\n\n\nclass LabelEncoder(BaseEstimator, TransformerMixin):\n    \"\"\"Encode labels with value between 0 and n_classes-1."
        },
        {
          "file": "sklearn/preprocessing/label.py",
          "type": "function",
          "name": "fit",
          "class_name": "LabelBinarizer",
          "code": "def fit(self, y):\n        \"\"\"Fit label binarizer\n\n        Parameters\n        ----------\n        y : numpy array of shape (n_samples,) or sequence of sequences\n            Target values. In the multilabel case the nested sequences can\n            have variable lengths.\n\n        Returns\n        -------\n        self : returns an instance of self.\n        \"\"\"\n        y_type = type_of_target(y)\n        self.multilabel_ = y_type.startswith('multilabel')\n        if self.multilabel_:\n            self.indicator_matrix_ = y_type == 'multilabel-indicator'\n\n        self.classes_ = unique_labels(y)\n\n        return self"
        },
        {
          "file": "sklearn/preprocessing/label.py",
          "type": "function",
          "name": "fit_transform",
          "class_name": "LabelEncoder",
          "code": "def fit_transform(self, y):\n        \"\"\"Fit label encoder and return encoded labels\n\n        Parameters\n        ----------\n        y : array-like of shape [n_samples]\n            Target values.\n\n        Returns\n        -------\n        y : array-like of shape [n_samples]\n        \"\"\"\n        y = column_or_1d(y, warn=True)\n        self.classes_, y = unique(y, return_inverse=True)\n        return y"
        },
        {
          "file": "sklearn/preprocessing/label.py",
          "type": "function",
          "name": "transform",
          "class_name": "LabelBinarizer",
          "code": "def transform(self, y):\n        \"\"\"Transform multi-class labels to binary labels\n\n        The output of transform is sometimes referred to by some authors as the\n        1-of-K coding scheme.\n\n        Parameters\n        ----------\n        y : numpy array of shape [n_samples] or sequence of sequences\n            Target values. In the multilabel case the nested sequences can\n            have variable lengths.\n\n        Returns\n        -------\n        Y : numpy array of shape [n_samples, n_classes]\n        \"\"\"\n        self._check_fitted()\n\n        y_is_multilabel = type_of_target(y).startswith('multilabel')\n\n        if y_is_multilabel and not self.multilabel_:\n            raise ValueError(\"The object was not fitted with multilabel\"\n                             \" input.\")\n\n        return label_binarize(y, self.classes_,\n                              multilabel=self.multilabel_,\n                              pos_label=self.pos_label,\n                              neg_label=self.neg_label)"
        }
      ]
    },
    {
      "pr_number": 13157,
      "pr_title": "[MRG+1]\u00a0API Change default multioutput in RegressorMixin.score to keep consistent with metrics.r2_score",
      "pr_body": "Closes #12772 \r\nWondering if someone has a better way :)\r\nIn the original issue, I tried to ask why we prefer uniform_average, but received no reply. I guess we choose uniform_average to keep consistent with other regression metrics.",
      "issue_id": 12772,
      "issue_title": "Different r2_score multioutput default in r2_score and base.RegressorMixin",
      "issue_body": "We've changed multioutput default in r2_score to \"uniform_average\" in 0.19, but in base.RegressorMixin, we still use ``multioutput='variance_weighted'`` (#5143).\r\nAlso see the strange things below:\r\nhttps://github.com/scikit-learn/scikit-learn/blob/4603e481e9ac67eaf906ae5936263b675ba9bc9c/sklearn/multioutput.py#L283-L286",
      "issue_closed_at": "2019-03-15T09:47:51Z",
      "base_commit": "85440978f517118e78dc15f84e397d50d14c8097",
      "changes": [
        {
          "file": "sklearn/base.py",
          "type": "function",
          "name": "score",
          "class_name": "DensityMixin",
          "code": "def score(self, X, y=None):\n        \"\"\"Returns the score of the model on the data X\n\n        Parameters\n        ----------\n        X : array-like, shape = (n_samples, n_features)\n\n        Returns\n        -------\n        score : float\n        \"\"\"\n        pass"
        },
        {
          "file": "sklearn/linear_model/coordinate_descent.py",
          "type": "class",
          "name": "MultiTaskLassoCV",
          "code": "class MultiTaskLassoCV(LinearModelCV, RegressorMixin):\n    \"\"\"Multi-task Lasso model trained with L1/L2 mixed-norm as regularizer.\n\n    See glossary entry for :term:`cross-validation estimator`.\n\n    The optimization objective for MultiTaskLasso is::\n\n        (1 / (2 * n_samples)) * ||Y - XW||^Fro_2 + alpha * ||W||_21\n\n    Where::\n\n        ||W||_21 = \\\\sum_i \\\\sqrt{\\\\sum_j w_{ij}^2}\n\n    i.e. the sum of norm of each row.\n\n    Read more in the :ref:`User Guide <multi_task_lasso>`.\n\n    Parameters\n    ----------\n    eps : float, optional\n        Length of the path. ``eps=1e-3`` means that\n        ``alpha_min / alpha_max = 1e-3``.\n\n    n_alphas : int, optional\n        Number of alphas along the regularization path\n\n    alphas : array-like, optional\n        List of alphas where to compute the models.\n        If not provided, set automatically.\n\n    fit_intercept : boolean\n        whether to calculate the intercept for this model. If set\n        to false, no intercept will be used in calculations\n        (e.g. data is expected to be already centered).\n\n    normalize : boolean, optional, default False\n        This parameter is ignored when ``fit_intercept`` is set to False.\n        If True, the regressors X will be normalized before regression by\n        subtracting the mean and dividing by the l2-norm.\n        If you wish to standardize, please use\n        :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``\n        on an estimator with ``normalize=False``.\n\n    max_iter : int, optional\n        The maximum number of iterations.\n\n    tol : float, optional\n        The tolerance for the optimization: if the updates are\n        smaller than ``tol``, the optimization code checks the\n        dual gap for optimality and continues until it is smaller\n        than ``tol``.\n\n    copy_X : boolean, optional, default True\n        If ``True``, X will be copied; else, it may be overwritten.\n\n    cv : int, cross-validation generator or an iterable, optional\n        Determines the cross-validation splitting strategy.\n        Possible inputs for cv are:\n\n        - None, to use the default 3-fold cross-validation,\n        - integer, to specify the number of folds.\n        - :term:`CV splitter`,\n        - An iterable yielding (train, test) splits as arrays of indices.\n\n        For integer/None inputs, :class:`KFold` is used.\n\n        Refer :ref:`User Guide <cross_validation>` for the various\n        cross-validation strategies that can be used here.\n\n        .. versionchanged:: 0.20\n            ``cv`` default value if None will change from 3-fold to 5-fold\n            in v0.22.\n\n    verbose : bool or integer\n        Amount of verbosity.\n\n    n_jobs : int or None, optional (default=None)\n        Number of CPUs to use during the cross validation. Note that this is\n        used only if multiple values for l1_ratio are given.\n        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`\n        for more details.\n\n    random_state : int, RandomState instance or None, optional, default None\n        The seed of the pseudo random number generator that selects a random\n        feature to update.  If int, random_state is the seed used by the random\n        number generator; If RandomState instance, random_state is the random\n        number generator; If None, the random number generator is the\n        RandomState instance used by `np.random`. Used when ``selection`` ==\n        'random'\n\n    selection : str, default 'cyclic'\n        If set to 'random', a random coefficient is updated every iteration\n        rather than looping over features sequentially by default. This\n        (setting to 'random') often leads to significantly faster convergence\n        especially when tol is higher than 1e-4.\n\n    Attributes\n    ----------\n    intercept_ : array, shape (n_tasks,)\n        Independent term in decision function.\n\n    coef_ : array, shape (n_tasks, n_features)\n        Parameter vector (W in the cost function formula).\n        Note that ``coef_`` stores the transpose of ``W``, ``W.T``.\n\n    alpha_ : float\n        The amount of penalization chosen by cross validation\n\n    mse_path_ : array, shape (n_alphas, n_folds)\n        mean square error for the test set on each fold, varying alpha\n\n    alphas_ : numpy array, shape (n_alphas,)\n        The grid of alphas used for fitting.\n\n    n_iter_ : int\n        number of iterations run by the coordinate descent solver to reach\n        the specified tolerance for the optimal alpha.\n\n    Examples\n    --------\n    >>> from sklearn.linear_model import MultiTaskLassoCV\n    >>> from sklearn.datasets import make_regression\n    >>> X, y = make_regression(n_targets=2, noise=4, random_state=0)\n    >>> reg = MultiTaskLassoCV(cv=5, random_state=0).fit(X, y)\n    >>> reg.score(X, y) # doctest: +ELLIPSIS\n    0.9994...\n    >>> reg.alpha_\n    0.5713...\n    >>> reg.predict(X[:1,])\n    array([[153.7971...,  94.9015...]])\n\n    See also\n    --------\n    MultiTaskElasticNet\n    ElasticNetCV\n    MultiTaskElasticNetCV\n\n    Notes\n    -----\n    The algorithm used to fit the model is coordinate descent.\n\n    To avoid unnecessary memory duplication the X argument of the fit method\n    should be directly passed as a Fortran-contiguous numpy array.\n    \"\"\"\n    path = staticmethod(lasso_path)\n\n    def __init__(self, eps=1e-3, n_alphas=100, alphas=None, fit_intercept=True,\n                 normalize=False, max_iter=1000, tol=1e-4, copy_X=True,\n                 cv='warn', verbose=False, n_jobs=None, random_state=None,\n                 selection='cyclic'):\n        super().__init__(\n            eps=eps, n_alphas=n_alphas, alphas=alphas,\n            fit_intercept=fit_intercept, normalize=normalize,\n            max_iter=max_iter, tol=tol, copy_X=copy_X,\n            cv=cv, verbose=verbose, n_jobs=n_jobs, random_state=random_state,\n            selection=selection)\n\n    def _more_tags(self):\n        return {'multioutput_only': True}"
        },
        {
          "file": "sklearn/multioutput.py",
          "type": "function",
          "name": "partial_fit",
          "class_name": "MultiOutputRegressor",
          "code": "def partial_fit(self, X, y, sample_weight=None):\n        \"\"\"Incrementally fit the model to data.\n        Fit a separate model for each output variable.\n\n        Parameters\n        ----------\n        X : (sparse) array-like, shape (n_samples, n_features)\n            Data.\n\n        y : (sparse) array-like, shape (n_samples, n_outputs)\n            Multi-output targets.\n\n        sample_weight : array-like, shape = (n_samples) or None\n            Sample weights. If None, then samples are equally weighted.\n            Only supported if the underlying regressor supports sample\n            weights.\n\n        Returns\n        -------\n        self : object\n        \"\"\"\n        super().partial_fit(\n            X, y, sample_weight=sample_weight)"
        }
      ]
    },
    {
      "pr_number": 11796,
      "pr_title": "[MRG+2] Fix LDA predict_proba() ",
      "pr_body": "<!--\r\nThanks for contributing a pull request! Please ensure you have taken a look at\r\nthe contribution guidelines: https://github.com/scikit-learn/scikit-learn/blob/master/CONTRIBUTING.md#pull-request-checklist\r\n-->\r\n\r\n#### Reference Issues/PRs\r\nFixes #6848\r\ncloses #11727\r\ncloses #5149\r\n<!--\r\nExample: Fixes #1234. See also #3456.\r\nPlease use keywords (e.g., Fixes) to create link to the issues or pull requests\r\nyou resolved, so that they will automatically be closed when your pull request\r\nis merged. See https://github.com/blog/1506-closing-issues-via-pull-requests\r\n-->\r\n\r\n\r\n#### What does this implement/fix? Explain your changes.\r\nFixes the `predict_proba()` method of LinearDiscriminantAnalysis.\r\nAn `if` statement is used to differentiate between the binary and multi-class case, due to the different output format of the `decision_function` method implemented in the `LinearClassifierMixin` class.\r\n\r\n#### Any other comments?\r\nCopying from #6848:\r\nDo we perhaps want to include additional tests checking the output of predict_proba for LDA and QDA both for the binary and multi-class cases?\r\n\r\n<!--\r\nPlease be aware that we are a loose team of volunteers so patience is\r\nnecessary; assistance handling other issues is very welcome. We value\r\nall user contributions, no matter how minor they are. If we are slow to\r\nreview, either the pull request needs some benchmarking, tinkering,\r\nconvincing, etc. or more likely the reviewers are simply busy. In either\r\ncase, we ask for your understanding during the review process.\r\nFor more information, see our FAQ on this topic:\r\nhttp://scikit-learn.org/dev/faq.html#why-is-my-pull-request-not-getting-any-attention.\r\n\r\nThanks for contributing!\r\n-->\r\n",
      "issue_id": 6848,
      "issue_title": "LinearDiscriminantAnalysis predict probability bug",
      "issue_body": "I am pretty confident there is a bug introduced in commit\n7c1101d7c26ba0b77184cce9c0b9be79adb526de\n\nConcretely, line 518 of the current version \nhttps://github.com/scikit-learn/scikit-learn/blob/master/sklearn/discriminant_analysis.py\nshould be removed as it yields wrong results. \n\nThere is no reason why constant 1 should be added to the computed probability after exponentiation and before inversion. \n\nTo verify this, I have run a one-to-one comparison between the outcome of the method and MATLAB's builtin LDA classifier on the Iris dataset. Only after removal of line 518, results match (up to a tolerance).\n\nIf everyone agrees on that, I am happy to submit a PR.\n",
      "issue_closed_at": "2019-03-07T16:44:18Z",
      "base_commit": "b73a51bcda362d94d8907915a382a8eb403554c8",
      "changes": [
        {
          "file": "sklearn/discriminant_analysis.py",
          "type": "line",
          "name": "line 22",
          "code": "from .utils import check_array, check_X_y\nfrom .utils.validation import check_is_fitted\nfrom .utils.multiclass import check_classification_targets\nfrom .preprocessing import StandardScaler\n\n"
        },
        {
          "file": "sklearn/discriminant_analysis.py",
          "type": "function",
          "name": "predict_proba",
          "class_name": "QuadraticDiscriminantAnalysis",
          "code": "def predict_proba(self, X):\n        \"\"\"Return posterior probabilities of classification.\n\n        Parameters\n        ----------\n        X : array-like, shape = [n_samples, n_features]\n            Array of samples/test vectors.\n\n        Returns\n        -------\n        C : array, shape = [n_samples, n_classes]\n            Posterior probabilities of classification per class.\n        \"\"\"\n        values = self._decision_function(X)\n        # compute the likelihood of the underlying gaussian models\n        # up to a multiplicative constant.\n        likelihood = np.exp(values - values.max(axis=1)[:, np.newaxis])\n        # compute posterior probabilities\n        return likelihood / likelihood.sum(axis=1)[:, np.newaxis]"
        }
      ]
    },
    {
      "pr_number": 11526,
      "pr_title": "[MRG+1] FIX: warns when invalid n_components in LinearDiscriminantAnalysis",
      "pr_body": "#### Reference Issues/PRs\r\nFixes #10048.\r\nFixes #8956. (The second dimension of scalings will always be thresholded (not only for svd, (see https://github.com/scikit-learn/scikit-learn/issues/8956#issuecomment-376881805)))\r\n\r\n#### What does this implement/fix? Explain your changes.\r\nThis PR: \r\n - Raises a `ChangedBehaviourWarning` when the user sets `n_components` > `min(n_features, n_classes - 1)`. In this case it sets the `max_features` (the number of first components to take) to `min(n_features, n_classes - 1)` (and this way it doesn't take the `n_components` into account anymore). It does not throws an error like PCA not to break user code (cf comment: https://github.com/scikit-learn/scikit-learn/issues/6355#issuecomment-340855091). I should maybe provide a `FutureWarning` and throw an error in the future ? \r\n- Changes the docstring, saying that we should have `n_components < min(n_features, n_classes - 1)` (and not just `n_components` < `n_classes - 1`)\r\n- Tests that if the condition is verified no warning is thrown otherwise a warning is thrown\r\n\r\nI did not check explicitly the dimension (just the presence/absence of warnings) because it can still happen that the dimension is unexpected if input points are colinear. I was thinking to tackle this in another PR (raise a warning in that case and/or return the whole `scalings_` (including zeros) without truncation) (see #11528)\r\n\r\n### TODO: \r\n- [x] Add `FutureWarning`",
      "issue_id": 8956,
      "issue_title": "LDA scalings_ gives wrong dimensions",
      "issue_body": "#### Description\r\nFor an array X with shape m x p (m samples and p features) and N classes, the scaling matrix should have  p rows and N-1 columns. \r\n\r\n#### Steps/Code to Reproduce\r\nExample:\r\n```python\r\nfrom sklearn.datasets import make_blobs\r\nfrom sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA\r\n\r\nX, label = make_blobs(n_samples=100, n_features=2, centers=5, cluster_std=0.10, random_state=0)\r\nlda = LDA()\r\nXlda = lda.fit(X, label)\r\nXlda.scalings_\r\n#array([[ 7.35157288,  6.76874473],\r\n#       [-6.45391558,  7.97604449]])\r\nXlda.scalings_.shape\r\n#(2, 2)\r\n```\r\n#### Expected Results\r\nI would expect the scalings_ matrix shape to be (2,4) as I have 2 features and the LDA would provide 5-1 components.\r\n#### Actual Results\r\nThe scalings_ matrix shape is currently (2,2)\r\n\r\n#### Versions\r\nWindows-10-10.0.15063-SP0\r\nPython 3.6.0 |Anaconda 4.3.1 (64-bit)| (default, Dec 23 2016, 11:57:41) [MSC v.1900 64 bit (AMD64)]\r\nNumPy 1.11.3\r\nSciPy 0.18.1\r\nScikit-Learn 0.18.1\r\n",
      "issue_closed_at": "2018-12-07T15:31:53Z",
      "base_commit": "dbd28e70be8366c4f789500c988032e2bf1024ec",
      "changes": [
        {
          "file": "sklearn/discriminant_analysis.py",
          "type": "line",
          "name": "line 12",
          "code": "from __future__ import print_function\nimport warnings\nimport numpy as np\nfrom scipy import linalg\nfrom .externals.six import string_types\nfrom .externals.six.moves import xrange"
        },
        {
          "file": "sklearn/discriminant_analysis.py",
          "type": "class",
          "name": "LinearDiscriminantAnalysis",
          "code": "class LinearDiscriminantAnalysis(BaseEstimator, LinearClassifierMixin,\n                                 TransformerMixin):\n    \"\"\"Linear Discriminant Analysis\n\n    A classifier with a linear decision boundary, generated by fitting class\n    conditional densities to the data and using Bayes' rule.\n\n    The model fits a Gaussian density to each class, assuming that all classes\n    share the same covariance matrix.\n\n    The fitted model can also be used to reduce the dimensionality of the input\n    by projecting it to the most discriminative directions.\n\n    .. versionadded:: 0.17\n       *LinearDiscriminantAnalysis*.\n\n    Read more in the :ref:`User Guide <lda_qda>`.\n\n    Parameters\n    ----------\n    solver : string, optional\n        Solver to use, possible values:\n          - 'svd': Singular value decomposition (default).\n            Does not compute the covariance matrix, therefore this solver is\n            recommended for data with a large number of features.\n          - 'lsqr': Least squares solution, can be combined with shrinkage.\n          - 'eigen': Eigenvalue decomposition, can be combined with shrinkage.\n\n    shrinkage : string or float, optional\n        Shrinkage parameter, possible values:\n          - None: no shrinkage (default).\n          - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n          - float between 0 and 1: fixed shrinkage parameter.\n\n        Note that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\n    priors : array, optional, shape (n_classes,)\n        Class priors.\n\n    n_components : int, optional\n        Number of components (< n_classes - 1) for dimensionality reduction.\n\n    store_covariance : bool, optional\n        Additionally compute class covariance matrix (default False), used\n        only in 'svd' solver.\n\n        .. versionadded:: 0.17\n\n    tol : float, optional, (default 1.0e-4)\n        Threshold used for rank estimation in SVD solver.\n\n        .. versionadded:: 0.17\n\n    Attributes\n    ----------\n    coef_ : array, shape (n_features,) or (n_classes, n_features)\n        Weight vector(s).\n\n    intercept_ : array, shape (n_features,)\n        Intercept term.\n\n    covariance_ : array-like, shape (n_features, n_features)\n        Covariance matrix (shared by all classes).\n\n    explained_variance_ratio_ : array, shape (n_components,)\n        Percentage of variance explained by each of the selected components.\n        If ``n_components`` is not set then all components are stored and the\n        sum of explained variances is equal to 1.0. Only available when eigen\n        or svd solver is used.\n\n    means_ : array-like, shape (n_classes, n_features)\n        Class means.\n\n    priors_ : array-like, shape (n_classes,)\n        Class priors (sum to 1).\n\n    scalings_ : array-like, shape (rank, n_classes - 1)\n        Scaling of the features in the space spanned by the class centroids.\n\n    xbar_ : array-like, shape (n_features,)\n        Overall mean.\n\n    classes_ : array-like, shape (n_classes,)\n        Unique class labels.\n\n    See also\n    --------\n    sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis: Quadratic\n        Discriminant Analysis\n\n    Notes\n    -----\n    The default solver is 'svd'. It can perform both classification and\n    transform, and it does not rely on the calculation of the covariance\n    matrix. This can be an advantage in situations where the number of features\n    is large. However, the 'svd' solver cannot be used with shrinkage.\n\n    The 'lsqr' solver is an efficient algorithm that only works for\n    classification. It supports shrinkage.\n\n    The 'eigen' solver is based on the optimization of the between class\n    scatter to within class scatter ratio. It can be used for both\n    classification and transform, and it supports shrinkage. However, the\n    'eigen' solver needs to compute the covariance matrix, so it might not be\n    suitable for situations with a high number of features.\n\n    Examples\n    --------\n    >>> import numpy as np\n    >>> from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])\n    >>> y = np.array([1, 1, 1, 2, 2, 2])\n    >>> clf = LinearDiscriminantAnalysis()\n    >>> clf.fit(X, y)\n    LinearDiscriminantAnalysis(n_components=None, priors=None, shrinkage=None,\n                  solver='svd', store_covariance=False, tol=0.0001)\n    >>> print(clf.predict([[-0.8, -1]]))\n    [1]\n    \"\"\"\n\n    def __init__(self, solver='svd', shrinkage=None, priors=None,\n                 n_components=None, store_covariance=False, tol=1e-4):\n        self.solver = solver\n        self.shrinkage = shrinkage\n        self.priors = priors\n        self.n_components = n_components\n        self.store_covariance = store_covariance  # used only in svd solver\n        self.tol = tol  # used only in svd solver\n\n    def _solve_lsqr(self, X, y, shrinkage):\n        \"\"\"Least squares solver.\n\n        The least squares solver computes a straightforward solution of the\n        optimal decision rule based directly on the discriminant functions. It\n        can only be used for classification (with optional shrinkage), because\n        estimation of eigenvectors is not performed. Therefore, dimensionality\n        reduction with the transform is not supported.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_classes)\n            Target values.\n\n        shrinkage : string or float, optional\n            Shrinkage parameter, possible values:\n              - None: no shrinkage (default).\n              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n              - float between 0 and 1: fixed shrinkage parameter.\n\n        Notes\n        -----\n        This solver is based on [1]_, section 2.6.2, pp. 39-41.\n\n        References\n        ----------\n        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n           0-471-05669-3.\n        \"\"\"\n        self.means_ = _class_means(X, y)\n        self.covariance_ = _class_cov(X, y, self.priors_, shrinkage)\n        self.coef_ = linalg.lstsq(self.covariance_, self.means_.T)[0].T\n        self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +\n                           np.log(self.priors_))\n\n    def _solve_eigen(self, X, y, shrinkage):\n        \"\"\"Eigenvalue solver.\n\n        The eigenvalue solver computes the optimal solution of the Rayleigh\n        coefficient (basically the ratio of between class scatter to within\n        class scatter). This solver supports both classification and\n        dimensionality reduction (with optional shrinkage).\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n\n        shrinkage : string or float, optional\n            Shrinkage parameter, possible values:\n              - None: no shrinkage (default).\n              - 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n              - float between 0 and 1: fixed shrinkage constant.\n\n        Notes\n        -----\n        This solver is based on [1]_, section 3.8.3, pp. 121-124.\n\n        References\n        ----------\n        .. [1] R. O. Duda, P. E. Hart, D. G. Stork. Pattern Classification\n           (Second Edition). John Wiley & Sons, Inc., New York, 2001. ISBN\n           0-471-05669-3.\n        \"\"\"\n        self.means_ = _class_means(X, y)\n        self.covariance_ = _class_cov(X, y, self.priors_, shrinkage)\n\n        Sw = self.covariance_  # within scatter\n        St = _cov(X, shrinkage)  # total scatter\n        Sb = St - Sw  # between scatter\n\n        evals, evecs = linalg.eigh(Sb, Sw)\n        self.explained_variance_ratio_ = np.sort(evals / np.sum(evals)\n                                                 )[::-1][:self._max_components]\n        evecs = evecs[:, np.argsort(evals)[::-1]]  # sort eigenvectors\n        evecs /= np.linalg.norm(evecs, axis=0)\n\n        self.scalings_ = evecs\n        self.coef_ = np.dot(self.means_, evecs).dot(evecs.T)\n        self.intercept_ = (-0.5 * np.diag(np.dot(self.means_, self.coef_.T)) +\n                           np.log(self.priors_))\n\n    def _solve_svd(self, X, y):\n        \"\"\"SVD solver.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data.\n\n        y : array-like, shape (n_samples,) or (n_samples, n_targets)\n            Target values.\n        \"\"\"\n        n_samples, n_features = X.shape\n        n_classes = len(self.classes_)\n\n        self.means_ = _class_means(X, y)\n        if self.store_covariance:\n            self.covariance_ = _class_cov(X, y, self.priors_)\n\n        Xc = []\n        for idx, group in enumerate(self.classes_):\n            Xg = X[y == group, :]\n            Xc.append(Xg - self.means_[idx])\n\n        self.xbar_ = np.dot(self.priors_, self.means_)\n\n        Xc = np.concatenate(Xc, axis=0)\n\n        # 1) within (univariate) scaling by with classes std-dev\n        std = Xc.std(axis=0)\n        # avoid division by zero in normalization\n        std[std == 0] = 1.\n        fac = 1. / (n_samples - n_classes)\n\n        # 2) Within variance scaling\n        X = np.sqrt(fac) * (Xc / std)\n        # SVD of centered (within)scaled data\n        U, S, V = linalg.svd(X, full_matrices=False)\n\n        rank = np.sum(S > self.tol)\n        if rank < n_features:\n            warnings.warn(\"Variables are collinear.\")\n        # Scaling of within covariance is: V' 1/S\n        scalings = (V[:rank] / std).T / S[:rank]\n\n        # 3) Between variance scaling\n        # Scale weighted centers\n        X = np.dot(((np.sqrt((n_samples * self.priors_) * fac)) *\n                    (self.means_ - self.xbar_).T).T, scalings)\n        # Centers are living in a space with n_classes-1 dim (maximum)\n        # Use SVD to find projection in the space spanned by the\n        # (n_classes) centers\n        _, S, V = linalg.svd(X, full_matrices=0)\n\n        self.explained_variance_ratio_ = (S**2 / np.sum(\n            S**2))[:self._max_components]\n        rank = np.sum(S > self.tol * S[0])\n        self.scalings_ = np.dot(scalings, V.T[:, :rank])\n        coef = np.dot(self.means_ - self.xbar_, self.scalings_)\n        self.intercept_ = (-0.5 * np.sum(coef ** 2, axis=1) +\n                           np.log(self.priors_))\n        self.coef_ = np.dot(coef, self.scalings_.T)\n        self.intercept_ -= np.dot(self.xbar_, self.coef_.T)\n\n    def fit(self, X, y):\n        \"\"\"Fit LinearDiscriminantAnalysis model according to the given\n           training data and parameters.\n\n           .. versionchanged:: 0.19\n              *store_covariance* has been moved to main constructor.\n\n           .. versionchanged:: 0.19\n              *tol* has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Training data.\n\n        y : array, shape (n_samples,)\n            Target values.\n        \"\"\"\n        X, y = check_X_y(X, y, ensure_min_samples=2, estimator=self)\n        self.classes_ = unique_labels(y)\n        n_samples, _ = X.shape\n        n_classes = len(self.classes_)\n\n        if n_samples == n_classes:\n            raise ValueError(\"The number of samples must be more \"\n                             \"than the number of classes.\")\n\n        if self.priors is None:  # estimate priors from sample\n            _, y_t = np.unique(y, return_inverse=True)  # non-negative ints\n            self.priors_ = np.bincount(y_t) / float(len(y))\n        else:\n            self.priors_ = np.asarray(self.priors)\n\n        if (self.priors_ < 0).any():\n            raise ValueError(\"priors must be non-negative\")\n        if not np.isclose(self.priors_.sum(), 1.0):\n            warnings.warn(\"The priors do not sum to 1. Renormalizing\",\n                          UserWarning)\n            self.priors_ = self.priors_ / self.priors_.sum()\n\n        # Get the maximum number of components\n        if self.n_components is None:\n            self._max_components = len(self.classes_) - 1\n        else:\n            self._max_components = min(len(self.classes_) - 1,\n                                       self.n_components)\n\n        if self.solver == 'svd':\n            if self.shrinkage is not None:\n                raise NotImplementedError('shrinkage not supported')\n            self._solve_svd(X, y)\n        elif self.solver == 'lsqr':\n            self._solve_lsqr(X, y, shrinkage=self.shrinkage)\n        elif self.solver == 'eigen':\n            self._solve_eigen(X, y, shrinkage=self.shrinkage)\n        else:\n            raise ValueError(\"unknown solver {} (valid solvers are 'svd', \"\n                             \"'lsqr', and 'eigen').\".format(self.solver))\n        if self.classes_.size == 2:  # treat binary case as a special case\n            self.coef_ = np.array(self.coef_[1, :] - self.coef_[0, :], ndmin=2)\n            self.intercept_ = np.array(self.intercept_[1] - self.intercept_[0],\n                                       ndmin=1)\n        return self\n\n    def transform(self, X):\n        \"\"\"Project data to maximize class separation.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        X_new : array, shape (n_samples, n_components)\n            Transformed data.\n        \"\"\"\n        if self.solver == 'lsqr':\n            raise NotImplementedError(\"transform not implemented for 'lsqr' \"\n                                      \"solver (use 'svd' or 'eigen').\")\n        check_is_fitted(self, ['xbar_', 'scalings_'], all_or_any=any)\n\n        X = check_array(X)\n        if self.solver == 'svd':\n            X_new = np.dot(X - self.xbar_, self.scalings_)\n        elif self.solver == 'eigen':\n            X_new = np.dot(X, self.scalings_)\n\n        return X_new[:, :self._max_components]\n\n    def predict_proba(self, X):\n        \"\"\"Estimate probability.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : array, shape (n_samples, n_classes)\n            Estimated probabilities.\n        \"\"\"\n        prob = self.decision_function(X)\n        prob *= -1\n        np.exp(prob, prob)\n        prob += 1\n        np.reciprocal(prob, prob)\n        if len(self.classes_) == 2:  # binary case\n            return np.column_stack([1 - prob, prob])\n        else:\n            # OvR normalization, like LibLinear's predict_probability\n            prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))\n            return prob\n\n    def predict_log_proba(self, X):\n        \"\"\"Estimate log probability.\n\n        Parameters\n        ----------\n        X : array-like, shape (n_samples, n_features)\n            Input data.\n\n        Returns\n        -------\n        C : array, shape (n_samples, n_classes)\n            Estimated log probabilities.\n        \"\"\"\n        return np.log(self.predict_proba(X))"
        },
        {
          "file": "sklearn/discriminant_analysis.py",
          "type": "function",
          "name": "fit",
          "class_name": "QuadraticDiscriminantAnalysis",
          "code": "def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n            .. versionchanged:: 0.19\n               ``store_covariances`` has been moved to main constructor as\n               ``store_covariance``\n\n            .. versionchanged:: 0.19\n               ``tol`` has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like, shape = [n_samples, n_features]\n            Training vector, where n_samples is the number of samples and\n            n_features is the number of features.\n\n        y : array, shape = [n_samples]\n            Target values (integers)\n        \"\"\"\n        X, y = check_X_y(X, y)\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        n_samples, n_features = X.shape\n        n_classes = len(self.classes_)\n        if n_classes < 2:\n            raise ValueError('The number of classes has to be greater than'\n                             ' one; got %d class' % (n_classes))\n        if self.priors is None:\n            self.priors_ = np.bincount(y) / float(n_samples)\n        else:\n            self.priors_ = self.priors\n\n        cov = None\n        store_covariance = self.store_covariance\n        if store_covariance:\n            cov = []\n        means = []\n        scalings = []\n        rotations = []\n        for ind in xrange(n_classes):\n            Xg = X[y == ind, :]\n            meang = Xg.mean(0)\n            means.append(meang)\n            if len(Xg) == 1:\n                raise ValueError('y has only 1 sample in class %s, covariance '\n                                 'is ill defined.' % str(self.classes_[ind]))\n            Xgc = Xg - meang\n            # Xgc = U * S * V.T\n            U, S, Vt = np.linalg.svd(Xgc, full_matrices=False)\n            rank = np.sum(S > self.tol)\n            if rank < n_features:\n                warnings.warn(\"Variables are collinear\")\n            S2 = (S ** 2) / (len(Xg) - 1)\n            S2 = ((1 - self.reg_param) * S2) + self.reg_param\n            if self.store_covariance or store_covariance:\n                # cov = V * (S^2 / (n-1)) * V.T\n                cov.append(np.dot(S2 * Vt.T, Vt))\n            scalings.append(S2)\n            rotations.append(Vt.T)\n        if self.store_covariance or store_covariance:\n            self.covariance_ = cov\n        self.means_ = np.asarray(means)\n        self.scalings_ = scalings\n        self.rotations_ = rotations\n        return self"
        },
        {
          "file": "sklearn/discriminant_analysis.py",
          "type": "function",
          "name": "fit",
          "class_name": "QuadraticDiscriminantAnalysis",
          "code": "def fit(self, X, y):\n        \"\"\"Fit the model according to the given training data and parameters.\n\n            .. versionchanged:: 0.19\n               ``store_covariances`` has been moved to main constructor as\n               ``store_covariance``\n\n            .. versionchanged:: 0.19\n               ``tol`` has been moved to main constructor.\n\n        Parameters\n        ----------\n        X : array-like, shape = [n_samples, n_features]\n            Training vector, where n_samples is the number of samples and\n            n_features is the number of features.\n\n        y : array, shape = [n_samples]\n            Target values (integers)\n        \"\"\"\n        X, y = check_X_y(X, y)\n        check_classification_targets(y)\n        self.classes_, y = np.unique(y, return_inverse=True)\n        n_samples, n_features = X.shape\n        n_classes = len(self.classes_)\n        if n_classes < 2:\n            raise ValueError('The number of classes has to be greater than'\n                             ' one; got %d class' % (n_classes))\n        if self.priors is None:\n            self.priors_ = np.bincount(y) / float(n_samples)\n        else:\n            self.priors_ = self.priors\n\n        cov = None\n        store_covariance = self.store_covariance\n        if store_covariance:\n            cov = []\n        means = []\n        scalings = []\n        rotations = []\n        for ind in xrange(n_classes):\n            Xg = X[y == ind, :]\n            meang = Xg.mean(0)\n            means.append(meang)\n            if len(Xg) == 1:\n                raise ValueError('y has only 1 sample in class %s, covariance '\n                                 'is ill defined.' % str(self.classes_[ind]))\n            Xgc = Xg - meang\n            # Xgc = U * S * V.T\n            U, S, Vt = np.linalg.svd(Xgc, full_matrices=False)\n            rank = np.sum(S > self.tol)\n            if rank < n_features:\n                warnings.warn(\"Variables are collinear\")\n            S2 = (S ** 2) / (len(Xg) - 1)\n            S2 = ((1 - self.reg_param) * S2) + self.reg_param\n            if self.store_covariance or store_covariance:\n                # cov = V * (S^2 / (n-1)) * V.T\n                cov.append(np.dot(S2 * Vt.T, Vt))\n            scalings.append(S2)\n            rotations.append(Vt.T)\n        if self.store_covariance or store_covariance:\n            self.covariance_ = cov\n        self.means_ = np.asarray(means)\n        self.scalings_ = scalings\n        self.rotations_ = rotations\n        return self"
        }
      ]
    },
    {
      "pr_number": 8263,
      "pr_title": "[MRG+2] ENH: used SelectorMixin in BaseRandomizedLinearModel",
      "pr_body": "#### Reference Issue\r\nFixes #8259\r\n\r\n#### What does this implement/fix? Explain your changes.\r\nThis uses SelectorMixin in BaseRandomizedLinearModel renaming the `get_support` function to `_get_support_mask` in order to utilize the transform() and inverse_transform() methods provided by `SelectorMixin`.",
      "issue_id": 8259,
      "issue_title": "Use SelectorMixin in BaseRandomizedLinearModel",
      "issue_body": "Is there any reason not to? \r\n\r\nFixing this would also fix a bug where `Randomized*.transform` erroneously fails on a sparse matrix.",
      "issue_closed_at": "2017-02-13T12:10:19Z",
      "base_commit": "dfcf6322caac64ef49adee7a8faa92cfbd6473d5",
      "changes": [
        {
          "file": "sklearn/linear_model/randomized_l1.py",
          "type": "line",
          "name": "line 16",
          "code": "from scipy.interpolate import interp1d\n\nfrom .base import _preprocess_data\nfrom ..base import BaseEstimator, TransformerMixin\nfrom ..externals import six\nfrom ..externals.joblib import Memory, Parallel, delayed\nfrom ..utils import (as_float_array, check_random_state, check_X_y,\n                     check_array, safe_mask)\nfrom ..utils.validation import check_is_fitted\nfrom .least_angle import lars_path, LassoLarsIC\nfrom .logistic import LogisticRegression"
        },
        {
          "file": "sklearn/linear_model/randomized_l1.py",
          "type": "function",
          "name": "_resample_model",
          "class_name": null,
          "code": "def _resample_model(estimator_func, X, y, scaling=.5, n_resampling=200,\n                    n_jobs=1, verbose=False, pre_dispatch='3*n_jobs',\n                    random_state=None, sample_fraction=.75, **params):\n    random_state = check_random_state(random_state)\n    # We are generating 1 - weights, and not weights\n    n_samples, n_features = X.shape\n\n    if not (0 < scaling < 1):\n        raise ValueError(\n            \"'scaling' should be between 0 and 1. Got %r instead.\" % scaling)\n\n    scaling = 1. - scaling\n    scores_ = 0.0\n    for active_set in Parallel(n_jobs=n_jobs, verbose=verbose,\n                               pre_dispatch=pre_dispatch)(\n            delayed(estimator_func)(\n                X, y, weights=scaling * random_state.randint(\n                    0, 2, size=(n_features,)),\n                mask=(random_state.rand(n_samples) < sample_fraction),\n                verbose=max(0, verbose - 1),\n                **params)\n            for _ in range(n_resampling)):\n        scores_ += active_set\n\n    scores_ /= n_resampling\n    return scores_"
        },
        {
          "file": "sklearn/linear_model/randomized_l1.py",
          "type": "function",
          "name": "fit",
          "class_name": "BaseRandomizedLinearModel",
          "code": "def fit(self, X, y):\n        \"\"\"Fit the model using X, y as training data.\n\n        Parameters\n        ----------\n        X : array-like, shape = [n_samples, n_features]\n            Training data.\n\n        y : array-like, shape = [n_samples]\n            Target values.\n\n        Returns\n        -------\n        self : object\n            Returns an instance of self.\n        \"\"\"\n        X, y = check_X_y(X, y, ['csr', 'csc'], y_numeric=True,\n                         ensure_min_samples=2, estimator=self)\n        X = as_float_array(X, copy=False)\n        n_samples, n_features = X.shape\n\n        X, y, X_offset, y_offset, X_scale = \\\n            self._preprocess_data(X, y, self.fit_intercept, self.normalize)\n\n        estimator_func, params = self._make_estimator_and_params(X, y)\n        memory = self.memory\n        if isinstance(memory, six.string_types):\n            memory = Memory(cachedir=memory)\n\n        scores_ = memory.cache(\n            _resample_model, ignore=['verbose', 'n_jobs', 'pre_dispatch']\n        )(\n            estimator_func, X, y,\n            scaling=self.scaling, n_resampling=self.n_resampling,\n            n_jobs=self.n_jobs, verbose=self.verbose,\n            pre_dispatch=self.pre_dispatch, random_state=self.random_state,\n            sample_fraction=self.sample_fraction, **params)\n\n        if scores_.ndim == 1:\n            scores_ = scores_[:, np.newaxis]\n        self.all_scores_ = scores_\n        self.scores_ = np.max(self.all_scores_, axis=1)\n        return self"
        },
        {
          "file": "sklearn/linear_model/randomized_l1.py",
          "type": "function",
          "name": "_make_estimator_and_params",
          "class_name": "RandomizedLogisticRegression",
          "code": "def _make_estimator_and_params(self, X, y):\n        params = dict(C=self.C, tol=self.tol,\n                      fit_intercept=self.fit_intercept)\n        return _randomized_logistic, params"
        }
      ]
    }
  ]
}