Categoricals can only take on a limited number of possible values (categories) and
can be sorted according to the custom order of the categories.
To harness Categorical data type in Lets-Plot you can either add a pandas.Categotical variable to
your pandas.DataFrame or annotate any variable in your dataset as Categorical using
Lets-Plot as_discrete() function and the levels parameter.
import pandas as pd
from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()
mpg_df = pd.read_csv ("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")
mpg_df.head(4)
Unordered Discrete by Default .¶ggplot(mpg_df) + geom_bar(aes(x='manufacturer')) + coord_flip()
#
# Create a list of categories sorted according to a num. of vehicles in the dataset.
#
brands_by_count = mpg_df['manufacturer'].value_counts().index.tolist()
brands_by_count
pandas.Categorical Variable¶manufacturer_cat = pd.Categorical(mpg_df['manufacturer'], categories=brands_by_count, ordered=True)
mpg_df['manufacturer_cat'] = manufacturer_cat
ggplot(mpg_df) + \
geom_bar(aes(x='manufacturer_cat'),
labels=layer_labels(['..count..']).format('..count..', 'd'),
tooltips='none') + \
coord_flip()
Categorical Using as_discrete(levels=..)¶ggplot(mpg_df) + \
geom_bar(aes(x=as_discrete('manufacturer', levels=brands_by_count)),
labels=layer_labels(['..count..']).format('..count..', 'd'),
tooltips='none') + \
coord_flip()
ggplot(mpg_df) + \
geom_pie(aes(fill='drv', size='..sum..')) + \
facet_wrap(facets='manufacturer_cat', ncol=5, order=0) + \
scale_size(range=[2, 10]) + \
guides(size='none') + \
theme_void()