In [1]:
import pandas as pd

from lets_plot import *
from lets_plot.mapping import as_discrete
LetsPlot.setup_html()
In [2]:
DRAW_QUANTILES = [.25, .5, .75]
In [3]:
def plot_matrix(plots=[], width=400, height=300, columns=2):
    bunch = GGBunch()
    for i in range(len(plots)):
        row = int(i / columns)
        column = i % columns
        bunch.add_plot(plots[i], column * width, row * height, width, height)
    return bunch.show()
In [4]:
mpg_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")

mpg_df.head()
Out[4]:
Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy fl class
0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
3 4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact

Minimalistic example

In [5]:
ggplot(mpg_df, aes(y='hwy')) + geom_violin() + ggtitle("Simplest example")
Out[5]:

Comparison of geoms

In [6]:
p_d = ggplot(mpg_df) + \
    geom_density(aes(x='hwy', fill='drv'), color='black', alpha=.5) + \
    facet_grid(x='drv') + \
    coord_flip() + \
    ggtitle("geom_density()")
p_v = ggplot(mpg_df, aes(x=as_discrete('drv', order=1), y='hwy')) + \
    geom_violin(aes(fill='drv'), alpha=.5) + \
    ggtitle("geom_violin()")

plot_matrix([p_d, p_v])

Original parameters

draw_quantiles

In [7]:
tests = [
    {'draw_quantiles': None}, # default
    {'draw_quantiles': [.05, .5, .95]}, # all correct
    {'draw_quantiles': (1/3, .5, 2/3)}, # strange, but correct
    {'draw_quantiles': [.25]}, # only one
    {'draw_quantiles': []}, # empty
    {'draw_quantiles': [0, .5, 1]}, # include borders
    {'draw_quantiles': [-1, .5, 2], 'skip': True}, # beyond borders
    {'draw_quantiles': ['0.25', '0.5', '0.75'], 'skip': True}, # invalid values
    {'draw_quantiles': [True, False], 'skip': True}, # totally invalid values
    {'draw_quantiles': 0.5, 'skip': True}, # wrong parameter type
    {'draw_quantiles': True, 'skip': True}, # another wrong parameter type
    {'draw_quantiles': '0.25', 'skip': True}, # even worse parameter type
    {'draw_quantiles': object(), 'skip': True}, # totally wrong parameter type
]

ggplot(mpg_df, aes('drv', 'hwy')) + \
    geom_violin(draw_quantiles=DRAW_QUANTILES) + \
    ggtitle("draw_quantiles={0}".format(DRAW_QUANTILES))
Out[7]:

scale

In [8]:
tests = [
    {'scale': None},
    {'scale': 'area'},
    {'scale': 'count'},
    {'scale': 'width'},
]

plot_matrix([
    ggplot(mpg_df, aes('drv', 'hwy')) + \
        geom_violin(scale=test['scale'], \
                    draw_quantiles=DRAW_QUANTILES) + \
        ggtitle("scale={0}".format(test['scale']))
    for test in tests
])

Custom density parameters

In [9]:
p = ggplot(mpg_df, aes('drv', 'hwy'))
p_default = p + geom_violin(draw_quantiles=DRAW_QUANTILES) + ggtitle("Default")
p_kernel = p + geom_violin(draw_quantiles=DRAW_QUANTILES, kernel='epanechikov') + ggtitle("kernel='epanechikov'")
p_bw = p + geom_violin(draw_quantiles=DRAW_QUANTILES, bw=.1) + ggtitle("bw=0.1")
p_adjust = p + geom_violin(draw_quantiles=DRAW_QUANTILES, adjust=2) + ggtitle("adjust=2")

plot_matrix([p_default, p_kernel, p_bw, p_adjust])

Grouping and tooltips

In [10]:
ggplot(mpg_df, aes(x='drv', y='hwy')) + \
    geom_violin(aes(group='year', fill=as_discrete('year')), \
                draw_quantiles=DRAW_QUANTILES, \
                tooltips=layer_tooltips().line('^x')
        .line('year|@year')
        .line('hwy|@hwy')
        .line('violinwidth|@..violinwidth..')
        .line('density|@..density..')
        .line('count|@..count..')
        .line('scaled|@..scaled..')) + \
    ggtitle("Grouping and tooltips")
Out[10]:

Facets

In [11]:
ggplot(mpg_df, aes(x='drv', y='hwy')) + \
    geom_violin(aes(fill=as_discrete('year')), draw_quantiles=DRAW_QUANTILES) + \
    facet_grid(y='year')
Out[11]:

coord_flip()

In [12]:
ggplot(mpg_df, aes('drv', 'hwy')) + \
    geom_violin(draw_quantiles=DRAW_QUANTILES) + \
    coord_flip() + \
    ggtitle("Use coord_flip()")
Out[12]:

Additional layers

In [13]:
ggplot(mpg_df, aes(as_discrete('drv', order=-1), 'hwy')) + \
    geom_violin(aes(color='drv', fill='drv'), alpha=.5, size=2, \
                n=8, draw_quantiles=DRAW_QUANTILES,
                sampling=sampling_group_systematic(2)) + \
    scale_y_continuous(breaks=list(range(12, 29, 2))) + \
    scale_color_brewer(type='qual', palette='Set1') + \
    scale_fill_brewer(type='qual', palette='Set1') + \
    ylim(12, 28) + \
    coord_fixed(ratio=.2) + \
    theme_grey() + \
    ggtitle("Some additional aesthetics, parameters and layers")
Out[13]:
In [14]:
# Note: quartiles for violin need not to be equal to the quartiles for boxplot!
# See the last paragraph here: https://stackoverflow.com/a/36036821/11771414
quartiles = [1/4, 2/4, 3/4]
ggplot(mpg_df, aes(x='drv', y='hwy')) + \
    geom_violin(draw_quantiles=quartiles) + \
    geom_boxplot(width=.1)
Out[14]: