from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer

from yellowbrick.text import umap
from yellowbrick.datasets import load_hobbies

# Load the text data
corpus = load_hobbies()

tfidf = TfidfVectorizer()
docs = tfidf.fit_transform(corpus.data)

# Instantiate the clustering model
clusters = KMeans(n_clusters=5)
clusters.fit(docs)

viz = umap(docs, ["c{}".format(c) for c in clusters.labels_])