Source code for yellowbrick.contrib.statsmodels.base

# yellowbrick.contrib.statsmodels.base
# A basic wrapper for statsmodels that emulates a scikit-learn estimator.
#
# Author:  Ian Ozsvald
# Created: Wed Jan 10 12:47:00 2018 -0500
#
# ID: base.py [d6ebc39] benjamin@bengfort.com $

"""
A basic wrapper for statsmodels that emulates a scikit-learn estimator.
"""

##########################################################################
## Imports
##########################################################################

from sklearn.metrics import r2_score
from sklearn.base import BaseEstimator


##########################################################################
## statsmodels Estimator
##########################################################################


[docs]class StatsModelsWrapper(BaseEstimator): """ Wrap a statsmodels GLM as a sklearn (fake) BaseEstimator for YellowBrick. Examples -------- First import the external libraries and helper utilities: >>> import statsmodels.api as sm >>> from functools import partial Instantiate a partial with the statsmodels API: >>> glm_gaussian_partial = partial(sm.GLM, family=sm.families.Gaussian()) >>> sm_est = StatsModelsWrapper(glm_gaussian_partial) Create a Yellowbrick visualizer to visualize prediction error: >>> visualizer = PredictionError(sm_est) >>> visualizer.fit(X_train, y_train) >>> visualizer.score(X_test, y_test) For statsmodels usage, calling .summary() etc: >>> gaussian_model = glm_gaussian_partial(y_train, X_train) Notes ----- .. note:: This wrapper is trivial, options and extra things like weights are not currently handled. """ def __init__(self, glm_partial, stated_estimator_type="regressor", scorer=r2_score): # YellowBrick checks the attribute to see if it is a # regressor/clusterer/classifier self._estimator_type = stated_estimator_type # assume user passes in a partial which we can instantiate later self.glm_partial = glm_partial # needs a default scoring function, regression uses r^2 in sklearn self.scorer = scorer
[docs] def fit(self, X, y): """ Pretend to be a sklearn estimator, fit is called on creation """ # note that GLM takes endog (y) and then exog (X): # this is the reverse of sklearn's methods self.glm_model = self.glm_partial(y, X) self.glm_results = self.glm_model.fit() return self
[docs] def predict(self, X): return self.glm_results.predict(X)
[docs] def score(self, X, y): return self.scorer(y, self.predict(X))