Source code for yellowbrick.contrib.statsmodels.base

# yellowbrick.contrib.statsmodels.base
# A basic wrapper for statsmodels that emulates a scikit-learn estimator.
#
# Author:  Ian Ozsvald
# Created: Wed Jan 10 12:47:00 2018 -0500
#
# ID: base.py [d6ebc39] benjamin@bengfort.com $

"""
A basic wrapper for statsmodels that emulates a scikit-learn estimator.
"""

##########################################################################
## Imports
##########################################################################

from sklearn.metrics import r2_score
from sklearn.base import BaseEstimator


##########################################################################
## statsmodels Estimator
##########################################################################


[docs]class StatsModelsWrapper(BaseEstimator):
    """
    Wrap a statsmodels GLM as a sklearn (fake) BaseEstimator for YellowBrick.

    Examples
    --------
    First import the external libraries and helper utilities:

    >>> import statsmodels.api as sm
    >>> from functools import partial

    Instantiate a partial with the statsmodels API:

    >>> glm_gaussian_partial = partial(sm.GLM, family=sm.families.Gaussian())
    >>> sm_est = StatsModelsWrapper(glm_gaussian_partial)

    Create a Yellowbrick visualizer to visualize prediction error:

    >>> visualizer = PredictionError(sm_est)
    >>> visualizer.fit(X_train, y_train)
    >>> visualizer.score(X_test, y_test)

    For statsmodels usage, calling .summary() etc:

    >>> gaussian_model = glm_gaussian_partial(y_train, X_train)

    Notes
    -----
    .. note:: This wrapper is trivial, options and extra things like weights
        are not currently handled.
    """

    def __init__(self, glm_partial, stated_estimator_type="regressor", scorer=r2_score):

        # YellowBrick checks the attribute to see if it is a
        # regressor/clusterer/classifier
        self._estimator_type = stated_estimator_type

        # assume user passes in a partial which we can instantiate later
        self.glm_partial = glm_partial

        # needs a default scoring function, regression uses r^2 in sklearn
        self.scorer = scorer

[docs]    def fit(self, X, y):
        """
        Pretend to be a sklearn estimator, fit is called on creation
        """

        # note that GLM takes endog (y) and then exog (X):
        # this is the reverse of sklearn's methods
        self.glm_model = self.glm_partial(y, X)
        self.glm_results = self.glm_model.fit()
        return self

[docs]    def predict(self, X):
        return self.glm_results.predict(X)

[docs]    def score(self, X, y):
        return self.scorer(y, self.predict(X))