from collections import defaultdict

from sklearn.feature_extraction.text import CountVectorizer

from yellowbrick.text import freqdist
from yellowbrick.datasets import load_hobbies

# Load the text data
corpus = load_hobbies()

# Create a dict to map target labels to documents of that category
hobbies = defaultdict(list)
for text, label in zip(corpus.data, corpus.target):
    hobbies[label].append(text)

vectorizer = CountVectorizer(stop_words='english')
docs       = vectorizer.fit_transform(text for text in hobbies['cinema'])
features   = vectorizer.get_feature_names()

freqdist(features, docs, orient='v')