yellowbrick.features.pca öğesinin kaynak kodu

# yellowbrick.features.pca
# Decomposition based feature visualization with PCA.
#
# Author:   Carlo Morales <@cjmorale>
# Created:  Tue May 23 18:34:27 2017 -0400
#
# Copyright (C) 2017 District Data Labs
# For license information, see LICENSE.txt
#
# ID: pca.py [] cmorales@pacificmetrics.com $

"""
Decomposition based feature visualization with PCA.
"""

##########################################################################
## Imports
##########################################################################

# NOTE: must import mplot3d to load the 3D projection
import mpl_toolkits.mplot3d # noqa 
import matplotlib.pyplot as plt

from yellowbrick.features.base import FeatureVisualizer
from yellowbrick.style import palettes
from yellowbrick.exceptions import YellowbrickValueError

from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


##########################################################################
## Quick Methods
##########################################################################

def pca_decomposition(X, y=None, ax=None, scale=True, proj_dim=2,
                      colormap=palettes.DEFAULT_SEQUENCE, color=None, **kwargs):
    """Produce a two or three dimensional principal component plot of the data array ``X``
    projected onto it's largest sequential principal components. It is common practice to scale the
    data array ``X`` before applying a PC decomposition. Variable scaling can be controlled using
    the ``scale`` argument.

    Parameters
    ----------
    X : ndarray or DataFrame of shape n x m
        A matrix of n instances with m features.

    y : ndarray or Series of length n
        An array or series of target or class values.

    ax : matplotlib Axes, default: None
        The axes to plot the figure on.

    scale : bool, default: True
        Boolean that indicates if the values of X should be scaled.

    proj_dim : int, default: 2
        Dimension of the PCA visualizer.

    colormap : string or cmap, default: None
        Optional string or matplotlib cmap to colorize lines.
        Use either color to colorize the lines on a per class basis or
        colormap to color them on a continuous scale.

    color : list or tuple of colors, default: None
        Specify the colors for each individual class.

    kwargs : dict
        Keyword arguments that are passed to the base class and may influence
        the visualization as defined in other Visualizers.

    Examples
    --------
    >>> from sklearn import datasets
    >>> iris = datasets.load_iris()
    >>> X = iris.data
    >>> y = iris.target
    >>> pca_decomposition(X, color=y, proj_dim=3, colormap='RdBu_r')

    """
    # Instantiate the visualizer
    visualizer = PCADecomposition(X=X, y=y, ax=ax, scale=scale, proj_dim=proj_dim,
                                  colormap= colormap, color=color)

    # Fit and transform the visualizer (calls draw)
    visualizer.fit(X, y, **kwargs)
    visualizer.transform(X)

    # Return the axes object on the visualizer
    return visualizer.poof()

##########################################################################
##2D and #3D PCA Visualizer
##########################################################################

[belgeler]class PCADecomposition(FeatureVisualizer): """ Produce a two or three dimensional principal component plot of the data array ``X`` projected onto it's largest sequential principal components. It is common practice to scale the data array ``X`` before applying a PC decomposition. Variable scaling can be controlled using the ``scale`` argument. Parameters ---------- X : ndarray or DataFrame of shape n x m A matrix of n instances with m features. y : ndarray or Series of length n An array or series of target or class values. ax : matplotlib Axes, default: None The axes to plot the figure on. If None is passed in the current axes. will be used (or generated if required). scale : bool, default: True Boolean that indicates if user wants to scale data. proj_dim : int, default: 2 Dimension of the PCA visualizer. color : list or tuple of colors, default: None Specify the colors for each individual class. colormap : string or cmap, default: None Optional string or matplotlib cmap to colorize lines. Use either color to colorize the lines on a per class basis or colormap to color them on a continuous scale. kwargs : dict Keyword arguments that are passed to the base class and may influence the visualization as defined in other Visualizers. Examples -------- >>> from sklearn import datasets >>> iris = datasets.load_iris() >>> X = iris.data >>> y = iris.target >>> params = {'scale': True, 'center': False, 'col': y} >>> visualizer = PCADecomposition(**params) >>> visualizer.fit(X) >>> visualizer.transform(X) >>> visualizer.poof() """ def __init__(self, ax=None, scale=True, color=None, proj_dim=2, colormap=palettes.DEFAULT_SEQUENCE, **kwargs): super(PCADecomposition, self).__init__(ax=ax, **kwargs) # Data Parameters if proj_dim not in (2, 3): raise YellowbrickValueError("proj_dim object is not 2 or 3.") self.color = color self.scale = scale self.proj_dim = proj_dim self.pca_transformer = Pipeline([('scale', StandardScaler(with_std=self.scale)), ('pca', PCA(self.proj_dim, )) ]) # Visual Parameters self.colormap = colormap
[belgeler] def fit(self, X, y=None, **kwargs): self.pca_transformer.fit(X) return self
[belgeler] def transform(self, X, y=None, **kwargs): self.pca_features_ = self.pca_transformer.transform(X) self.draw() return self.pca_features_
[belgeler] def draw(self, **kwargs): X = self.pca_features_ if self.proj_dim == 2: self.ax.scatter(X[:, 0], X[:, 1], c=self.color, cmap=self.colormap) if self.proj_dim == 3: self.fig = plt.figure() self.fig = self.fig.add_subplot(111, projection='3d') self.ax = self.fig.scatter(X[:, 0], X[:, 1], X[:, 2], c=self.color, cmap=self.colormap) return self.ax
[belgeler] def finalize(self, **kwargs): # Set the title if self.proj_dim == 2: self.set_title('Principal Component Plot') self.ax.set_ylabel('Principal Component 2') self.ax.set_xlabel('Principal Component 1') else: self.fig.set_title('Principal Component Plot') self.fig.set_xlabel('Principal Component 1') self.fig.set_ylabel('Principal Component 2') self.fig.set_zlabel('Principal Component 3')