import pandas as pd
from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()
DRAW_QUANTILES = [.25, .5, .75]
def plot_matrix(plots=[], width=400, height=300, columns=2):
bunch = GGBunch()
for i in range(len(plots)):
row = int(i / columns)
column = i % columns
bunch.add_plot(plots[i], column * width, row * height, width, height)
return bunch.show()
mpg_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")
mpg_df.head()
ggplot(mpg_df, aes(y='hwy')) + geom_violin() + ggtitle("Simplest example")
p_d = ggplot(mpg_df) + \
geom_density(aes(x='hwy', fill='drv'), color='black', alpha=.5) + \
facet_grid(x='drv') + \
coord_flip() + \
ggtitle("geom_density()")
p_v = ggplot(mpg_df, aes(x=as_discrete('drv', order=1), y='hwy')) + \
geom_violin(aes(fill='drv'), alpha=.5) + \
ggtitle("geom_violin()")
plot_matrix([p_d, p_v])
draw_quantiles¶tests = [
{'draw_quantiles': None}, # default
{'draw_quantiles': [.05, .5, .95]}, # all correct
{'draw_quantiles': (1/3, .5, 2/3)}, # strange, but correct
{'draw_quantiles': [.25]}, # only one
{'draw_quantiles': []}, # empty
{'draw_quantiles': [0, .5, 1]}, # include borders
{'draw_quantiles': [-1, .5, 2], 'skip': True}, # beyond borders
{'draw_quantiles': ['0.25', '0.5', '0.75'], 'skip': True}, # invalid values
{'draw_quantiles': [True, False], 'skip': True}, # totally invalid values
{'draw_quantiles': 0.5, 'skip': True}, # wrong parameter type
{'draw_quantiles': True, 'skip': True}, # another wrong parameter type
{'draw_quantiles': '0.25', 'skip': True}, # even worse parameter type
{'draw_quantiles': object(), 'skip': True}, # totally wrong parameter type
]
ggplot(mpg_df, aes('drv', 'hwy')) + \
geom_violin(draw_quantiles=DRAW_QUANTILES) + \
ggtitle("draw_quantiles={0}".format(DRAW_QUANTILES))
scale¶tests = [
{'scale': None},
{'scale': 'area'},
{'scale': 'count'},
{'scale': 'width'},
]
plot_matrix([
ggplot(mpg_df, aes('drv', 'hwy')) + \
geom_violin(scale=test['scale'], \
draw_quantiles=DRAW_QUANTILES) + \
ggtitle("scale={0}".format(test['scale']))
for test in tests
])
p = ggplot(mpg_df, aes('drv', 'hwy'))
p_default = p + geom_violin(draw_quantiles=DRAW_QUANTILES) + ggtitle("Default")
p_kernel = p + geom_violin(draw_quantiles=DRAW_QUANTILES, kernel='epanechikov') + ggtitle("kernel='epanechikov'")
p_bw = p + geom_violin(draw_quantiles=DRAW_QUANTILES, bw=.1) + ggtitle("bw=0.1")
p_adjust = p + geom_violin(draw_quantiles=DRAW_QUANTILES, adjust=2) + ggtitle("adjust=2")
plot_matrix([p_default, p_kernel, p_bw, p_adjust])
ggplot(mpg_df, aes(x='drv', y='hwy')) + \
geom_violin(aes(group='year', fill=as_discrete('year')), \
draw_quantiles=DRAW_QUANTILES, \
tooltips=layer_tooltips().line('^x')
.line('year|@year')
.line('hwy|@hwy')
.line('violinwidth|@..violinwidth..')
.line('density|@..density..')
.line('count|@..count..')
.line('scaled|@..scaled..')) + \
ggtitle("Grouping and tooltips")
ggplot(mpg_df, aes(x='drv', y='hwy')) + \
geom_violin(aes(fill=as_discrete('year')), draw_quantiles=DRAW_QUANTILES) + \
facet_grid(y='year')
coord_flip()¶ggplot(mpg_df, aes('drv', 'hwy')) + \
geom_violin(draw_quantiles=DRAW_QUANTILES) + \
coord_flip() + \
ggtitle("Use coord_flip()")
ggplot(mpg_df, aes(as_discrete('drv', order=-1), 'hwy')) + \
geom_violin(aes(color='drv', fill='drv'), alpha=.5, size=2, \
n=8, draw_quantiles=DRAW_QUANTILES,
sampling=sampling_group_systematic(2)) + \
scale_y_continuous(breaks=list(range(12, 29, 2))) + \
scale_color_brewer(type='qual', palette='Set1') + \
scale_fill_brewer(type='qual', palette='Set1') + \
ylim(12, 28) + \
coord_fixed(ratio=.2) + \
theme_grey() + \
ggtitle("Some additional aesthetics, parameters and layers")
# Note: quartiles for violin need not to be equal to the quartiles for boxplot!
# See the last paragraph here: https://stackoverflow.com/a/36036821/11771414
quartiles = [1/4, 2/4, 3/4]
ggplot(mpg_df, aes(x='drv', y='hwy')) + \
geom_violin(draw_quantiles=quartiles) + \
geom_boxplot(width=.1)