Waterfall Chart

In [1]:
from lets_plot import *
from lets_plot.bistro import *
In [2]:
LetsPlot.setup_html()
In [3]:
data1 = dict(
    x = ["A", "B", "C", "D", "E"],
    y = [300, -100, -400, 300, 200],
)

data2 = dict(
    x = ["A", "B", "C", "D", "T1", "A", "B", "C", "D", "T2"],
    y = [100, 100, -300, 500, None, -200, 300, 100, -300, 0.0],
    m = ['relative', 'relative', 'relative', 'relative', 'total', 'relative', 'relative', 'relative', 'relative', 'total'],
)

data3 = dict(
    x = ["A", "B", "C", "D", "E", "A", "B", "C", "D", "T"],
    y = [100, 100, -300, 500, 300, -200, 300, 100, -300, 0.0],
    m = ['absolute', 'relative', 'relative', 'relative', 'absolute', 'relative', 'relative', 'relative', 'relative', 'total'],
)

Default

In [4]:
gggrid([
    waterfall_plot(data1, 'x', 'y') + ggtitle("Without measure"),
    waterfall_plot(data2, 'x', 'y', measure='m') + ggtitle("With measure", "Without absolute"),
    waterfall_plot(data3, 'x', 'y', measure='m') + ggtitle("With measure", "With absolute"),
])
Out[4]:

Parameters

Aesthetics

In [5]:
# color
gggrid([
    waterfall_plot(data1, 'x', 'y', size=1, color="magenta") + ggtitle("Without measure"),
    waterfall_plot(data1, 'x', 'y', size=1, color='flow_type', fill="lightgrey") + ggtitle("Without measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', size=1, color="magenta") + ggtitle("With measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', size=1, color='flow_type', fill="lightgrey") + ggtitle("With measure"),
], ncol=2)
Out[5]:
In [6]:
# fill
gggrid([
    waterfall_plot(data1, 'x', 'y', fill="magenta") + ggtitle("Without measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', fill="magenta") + ggtitle("With measure"),
])
Out[6]:
In [7]:
# size
gggrid([
    waterfall_plot(data1, 'x', 'y', size=2) + ggtitle("Without measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', size=2) + ggtitle("With measure"),
])
Out[7]:
In [8]:
# alpha
gggrid([
    waterfall_plot(data1, 'x', 'y', alpha=.5) + ggtitle("Without measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', alpha=.5) + ggtitle("With measure"),
])
Out[8]:
In [9]:
# linetype
gggrid([
    waterfall_plot(data1, 'x', 'y', size=1, linetype='dashed') + ggtitle("Without measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', size=1, linetype='dashed') + ggtitle("With measure"),
])
Out[9]:
In [10]:
# width
gggrid([
    waterfall_plot(data1, 'x', 'y', width=.4) + ggtitle("With measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', width=.4) + ggtitle("Without measure"),
])
Out[10]:

Standard parameters

In [11]:
# show_legend
gggrid([
    waterfall_plot(data1, 'x', 'y', show_legend=True) + ggtitle("Without measure", "Default calc_total"),
    waterfall_plot(data1, 'x', 'y', show_legend=True, calc_total=False) + ggtitle("Without measure", "calc_total=False"),
    waterfall_plot(data2, 'x', 'y', measure='m', show_legend=True) + ggtitle("With measure", "Default calc_total, without absolute"),
    waterfall_plot(data2, 'x', 'y', measure='m', show_legend=True, calc_total=False) + ggtitle("With measure", "calc_total=False, without absolute"),
    waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True) + ggtitle("With measure", "Default calc_total, with absolute"),
    waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True, calc_total=False) + ggtitle("With measure", "calc_total=False, with absolute"),
], ncol=2)
Out[11]:
In [12]:
# tooltips
gggrid([
    waterfall_plot(data1, 'x', 'y', relative_tooltips='none', absolute_tooltips='none') + \
        ggtitle("Without measure", "relative_tooltips='none',\nabsolute_tooltips='none'"),
    waterfall_plot(data1, 'x', 'y', relative_tooltips='detailed', absolute_tooltips='detailed') + \
        ggtitle("Without measure", "relative_tooltips='detailed',\nabsolute_tooltips='detailed'"),
    waterfall_plot(data1, 'x', 'y', relative_tooltips=layer_tooltips().line("@..dy..: from @..initial.. to @..value..").disable_splitting(), \
                                    absolute_tooltips=layer_tooltips().line("@..flow_type..: @..value..").disable_splitting()) + \
        ggtitle("Without measure", "Customized tooltips"),
    waterfall_plot(data2, 'x', 'y', measure='m', relative_tooltips='none', absolute_tooltips='none') + \
        ggtitle("With measure", "relative_tooltips='none',\nabsolute_tooltips='none'"),
    waterfall_plot(data2, 'x', 'y', measure='m', relative_tooltips='detailed', absolute_tooltips='detailed') + \
        ggtitle("With measure", "relative_tooltips='detailed',\nabsolute_tooltips='detailed'"),
    waterfall_plot(data2, 'x', 'y', measure='m', relative_tooltips=layer_tooltips().line("@..dy..: from @..initial.. to @..value..").disable_splitting(), \
                                                 absolute_tooltips=layer_tooltips().line("@..flow_type..: @..value..").disable_splitting()) + \
        ggtitle("With measure", "Customized tooltips"),
], ncol=3)
Out[12]:

Waterfall-specific parameters

In [13]:
# sorted_value
gggrid([
    waterfall_plot(data1, 'x', 'y', sorted_value=True) + ggtitle("Without measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', sorted_value=True) + ggtitle("With measure"),
])
Out[13]:
In [14]:
# threshold
gggrid([
    waterfall_plot(data1, 'x', 'y', threshold=200) + ggtitle("Without measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', threshold=200) + ggtitle("With measure"),
])
Out[14]:
In [15]:
# max_values
gggrid([
    waterfall_plot(data1, 'x', 'y', max_values=2) + ggtitle("Without measure"),
    waterfall_plot(data2, 'x', 'y', measure='m', max_values=2) + ggtitle("With measure"),
])
Out[15]:
In [16]:
# Use threshold to skip zeros
data_with_zeros = dict(
    x=['a', 'b', 'c', 'd', 't', 'a', 'b', 't'],
    y=[1, -2, 3, 0, None, 0, 2, None],
    m=['relative', 'relative', 'relative', 'relative', 'total', 'relative', 'relative', 'total'],
)

gggrid([
    waterfall_plot(data_with_zeros, 'x', 'y', measure='m'),
    waterfall_plot(data_with_zeros, 'x', 'y', measure='m', threshold=0),
])
Out[16]:
In [17]:
# calc_total
gggrid([
    waterfall_plot(data1, 'x', 'y') + ggtitle("Without measure", "Default"),
    waterfall_plot(data1, 'x', 'y', calc_total=False) + ggtitle("Without measure", "calc_total=False"),
    waterfall_plot(data2, 'x', 'y', measure='m') + ggtitle("With measure", "Default"),
    waterfall_plot(data2, 'x', 'y', measure='m', calc_total=False) + ggtitle("With measure", "calc_total=False"),
], ncol=2)
Out[17]:
In [18]:
# total_title
gggrid([
    waterfall_plot(data1, 'x', 'y', total_title="Result", show_legend=True, absolute_tooltips='detailed'),
    waterfall_plot(data2, 'x', 'y', measure='m', total_title="Result", show_legend=True, absolute_tooltips='detailed'),
])
Out[18]:

Control additional geometries

In [19]:
# hline
gggrid([
    waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line()),
    waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(blank=True)),
    waterfall_plot(data2, 'x', 'y', measure='m', hline=element_blank()),
    waterfall_plot(data2, 'x', 'y', measure='m', hline='blank'),
], ncol=2)
Out[19]:
In [20]:
# hline_ontop
waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(), hline_ontop=False)
Out[20]:
In [21]:
# hline color
waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(color="magenta"))
Out[21]:
In [22]:
# hline size
waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(size=2))
Out[22]:
In [23]:
# hline linetype
waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(linetype='solid'))
Out[23]:
In [24]:
# connector
gggrid([
    waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line()),
    waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line(blank=True)),
    waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_blank()),
    waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector='blank'),
], ncol=2)
Out[24]:
In [25]:
# connector color
waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line(color="magenta"))
Out[25]:
In [26]:
# connector size
waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line(size=2))
Out[26]:
In [27]:
# connector linetype
waterfall_plot(data2, 'x', 'y', measure='m', width=.5, connector=element_line(linetype='dotted'))
Out[27]:
In [28]:
# label
gggrid([
    waterfall_plot(data2, 'x', 'y', measure='m', label=element_text()),
    waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(blank=True)),
    waterfall_plot(data2, 'x', 'y', measure='m', label=element_blank()),
    waterfall_plot(data2, 'x', 'y', measure='m', label='blank'),
], ncol=2)
Out[28]:
In [29]:
# label color
gggrid([
    waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(color="yellow")) + \
        ggtitle("Expected text color: yellow"),
    waterfall_plot(data2, 'x', 'y', measure='m', fill="lightgray", color="magenta", label=element_text(color='inherit')) + \
        ggtitle("Expected text color: magenta"),
    waterfall_plot(data2, 'x', 'y', measure='m', fill="lightgray", color='flow_type', label=element_text(color='inherit')) + \
        ggtitle("Expected text color: 'flow_type'"),
    waterfall_plot(data2, 'x', 'y', measure='m', fill="lightgray", label=element_text(color="magenta")) + \
        ggtitle("Expected text color: magenta"),
    waterfall_plot(data2, 'x', 'y', measure='m', fill="lightgray", color='flow_type', label=element_text(color="magenta")) + \
        ggtitle("Expected text color: magenta"),
], ncol=2)
Out[29]:
In [30]:
# label family
waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(family="Courier"))
Out[30]:
In [31]:
# label face
waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(face='bold_italic'))
Out[31]:
In [32]:
# label size
waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(size=8))
Out[32]:
In [33]:
# label angle (it doesn't work since waterfall_plot() uses annotations instead of geom_text())
# waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(angle=45))
In [34]:
# label hjust/vjust (it doesn't work since waterfall_plot() uses annotations instead of geom_text())
def get_waterfall_with_justified_labels(hjust, vjust):
    return waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(hjust=hjust, vjust=vjust)) + \
        ggtitle("Justified labels", "hjust={0}, vjust={1}".format(hjust, vjust))

# gggrid([
#     get_waterfall_with_justified_labels(0, 0), get_waterfall_with_justified_labels(0, 1),
#     get_waterfall_with_justified_labels(1, 0), get_waterfall_with_justified_labels(1, 1),
# ], ncol=2)
In [35]:
# label_format
waterfall_plot(data2, 'x', 'y', measure='m', label_format="({.1f})")
Out[35]:
In [36]:
# group
data_with_groups = {
    'x': ['A', 'C', 'T1', 'A', 'B', 'C', 'T2'],
    'y': [2, -1, None, 1, 3, -2, 0.0],
    'm': ['absolute', 'relative', 'total', 'absolute', 'relative', 'relative', 'total'],
    'g': ['a', 'a', 'a', 'b', 'b', 'b', 'b'],
}

waterfall_plot(data_with_groups, 'x', 'y', measure='m', group='g') + facet_wrap(facets='g', scales='free_x')
Out[36]:

Other Customizations

In [37]:
# fill and color

gggrid([
    waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True, width=.7, size=1, color="#777777", label=element_text(color="#777777")) + \
        scale_fill_manual({"Increase": "white", "Decrease": "black", "Absolute": "green", "Total": "yellow"}) + \
        ggtitle("Custom scale_fill_manual()"),
    waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True, width=.7, fill="black", color='flow_type', label=element_text(color='inherit')) + \
        scale_color_manual({"Increase": "green", "Decrease": "yellow", "Absolute": "red", "Total": "#bbbbff"}) + \
        ggtitle("Custom scale_color_manual()"),
    waterfall_plot(data3, 'x', 'y', measure='m', show_legend=True, width=.7, color="#777777", label=element_text(color="#777777")) + \
        scale_fill_manual({"Increase": "green", "Decrease": "red", "Absolute": "cyan", "Total": "yellow"}, labels=["Up", "Down", "From zero", "Result"]) + \
        ggtitle("Custom flow type names"),
], ncol=1) + ggsize(1000, 800)
Out[37]:
In [38]:
# flip coordinates
waterfall_plot(data2, 'x', 'y', measure='m') + coord_flip()
Out[38]:
In [39]:
# custom theme
waterfall_plot(data2, 'x', 'y', measure='m') + theme_bw() + flavor_darcula()
Out[39]:

Tests

Boundary Value Analysis

In [40]:
import numpy as np

class BVATest:
    def __init__(self, data, title, show=True):
        self.data = data
        self.title = title
        self.show = show

    def to_plot(self):
        return gggrid([
            waterfall_plot(self.data, 'x', 'y', measure='m', calc_total=True) + \
                ggtitle(self.title, "calc_total=True"),
            waterfall_plot(self.data, 'x', 'y', measure='m', calc_total=False) + \
                ggtitle(self.title, "calc_total=False"),
        ])

show_all = False
bva_tests = [
    BVATest(
        data=dict(
            x=[],
            y=[],
            m=[],
        ),
        title="Empty dataset",
    ),
    BVATest(
        data=dict(
            x=["A", "T"],
            y=[1, None],
            m=['relative', 'total'],
        ),
        title="One value dataset",
    ),
    BVATest(
        data=dict(
            x=["A", "A", "T"],
            y=[1, 2, None],
            m=['relative', 'relative', 'total'],
        ),
        title="Repeated categories",
    ),
    BVATest(
        data=dict(
            x=["A", "T"],
            y=[0, None],
            m=['relative', 'total'],
        ),
        title="Zero values",
    ),
    BVATest(
        data=dict(
            x=["A", "T"],
            y=[-1, None],
            m=['relative', 'total'],
        ),
        title="Negative values",
    ),
    BVATest(
        data=dict(
            x=[1, 0],
            y=[1, None],
            m=['relative', 'total'],
        ),
        title="Numeric x",
    ),
    BVATest(
        data=dict(
            x=["A", "B", "T"],
            y=[1, float('inf'), float('-inf')],
            m=['relative', 'relative', 'total'],
        ),
        title="Inf values",
    ),
    BVATest(
        data=dict(
            x=["A", "B", "C", None],
            y=[1, 1, None, 1],
            m=['relative', None, 'relative', 'total'],
        ),
        title="None values",
    ),
    BVATest(
        data=dict(
            x=["A", "B", "C", np.nan],
            y=[1, 1, np.nan, 1],
            m=['relative', np.nan, 'relative', 'total'],
        ),
        title="np.nan values",
    ),
    BVATest(
        data=dict(
            x=["A", "B", "T"],
            y=[1, -1, None],
            m=['relative', 'relative', 'total'],
        ),
        title="Total is zero",
    ),
]

gggrid([
    t.to_plot()
    for t in filter(lambda t: show_all or t.show, bva_tests)
], ncol=1)
Out[40]:

Regression Testing

In [41]:
# Total should be equal to [3, 4] (in labels and tooltips)
tdata = dict(
    x=["A", "B", "T1", "A", "B", "T2"],
    y=[1, 2, None, -1, 2, 0],
    m=['relative', 'relative', 'total'] * 2
)
waterfall_plot(tdata, 'x', 'y', measure='m')
Out[41]:
In [42]:
# Change of hline properties shouldn't affect to the further plots. The same is for connector and label.
gggrid([
    waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line(color="magenta")),
    waterfall_plot(data2, 'x', 'y', measure='m', hline=element_line()),
    waterfall_plot(data2, 'x', 'y', measure='m', connector=element_line(color="magenta")),
    waterfall_plot(data2, 'x', 'y', measure='m', connector=element_line()),
    waterfall_plot(data2, 'x', 'y', measure='m', label=element_text(color="black")),
    waterfall_plot(data2, 'x', 'y', measure='m', label=element_text()),
], ncol=2)
Out[42]:
In [43]:
# Change of total_title shouldn't affect to the further plots
gggrid([
    waterfall_plot(data1, 'x', 'y', total_title="Result"),
    waterfall_plot(data1, 'x', 'y'),
])
Out[43]:
In [44]:
# xxx_tooltips='none' should disable tooltips
gggrid([
    waterfall_plot(data1, 'x', 'y', relative_tooltips='none', absolute_tooltips='none'),
    waterfall_plot(data2, 'x', 'y', measure='m', relative_tooltips='none', absolute_tooltips='none'),
])
Out[44]:

New Features

Param base

In [45]:
gggrid([
    waterfall_plot(data1, 'x', 'y') + ggtitle("Without measure", "Default"),
    waterfall_plot(data1, 'x', 'y', base=-100) + ggtitle("Without measure", "base=-100"),
    waterfall_plot(data2, 'x', 'y', measure='m') + ggtitle("With measure", "Default"),
    waterfall_plot(data2, 'x', 'y', measure='m', base=-100) + ggtitle("With measure", "base=-100"),
], ncol=2)
Out[45]:

Annotations for labels

In [46]:
# Hide labels
gggrid([
    waterfall_plot(data1, "x", "y"),
    waterfall_plot(data1, "x", "y", absolute_labels='none'),
    waterfall_plot(data1, "x", "y", relative_labels='none'),
    waterfall_plot(data2, 'x', 'y', measure='m'),
    waterfall_plot(data2, 'x', 'y', measure='m', absolute_labels='none'),
    waterfall_plot(data2, 'x', 'y', measure='m', relative_labels='none'),
], ncol=3)
Out[46]:
In [47]:
# Theme overrides labels color
waterfall_plot_with_colored_labels = waterfall_plot(data1, "x", "y", label=element_text(color="gray"))
waterfall_plot_with_measures_and_colored_labels = waterfall_plot(data2, "x", "y", measure="m", label=element_text(color="gray"))
gggrid([
    waterfall_plot_with_colored_labels,
    waterfall_plot_with_colored_labels + theme(label_text=element_text(color="magenta")),
    waterfall_plot_with_measures_and_colored_labels,
    waterfall_plot_with_measures_and_colored_labels + theme(label_text=element_text(color="magenta")),
], ncol=2)
Out[47]:
In [48]:
# Use additional data with absolute_labels/relative_labels
data11 = {**data1, **{
    'text': ['a1', 'b1', 'c1', 'd1', 'e1'],
}}

data21 = {**data2, **{
    'text': ['a1', 'b1', 'c1', 'd1', 't1', 'a2', 'b2', 'c2', 'd2', 't2']
}}

gggrid([
    waterfall_plot(data11, 'x', 'y', relative_labels=layer_labels().line("@text").line("@..dy.."),
                                     absolute_labels=layer_labels().line("total").line("@..dy..").format("@..dy..", ".2f"),
                                     label_format="({.1f})"), # doesn't work when specified relative_labels/absolute_labels

    waterfall_plot(data21, 'x', 'y', measure='m',
                   relative_labels=layer_labels().line("@text").line("@..dy.."),
                   absolute_labels=layer_labels().line("total").line("@..dy..").format("@..dy..", ".2f"),
                   label_format="({.1f})")
])
Out[48]: