Small examples for checking computation messages from stats.
import numpy as np
import pandas as pd
from lets_plot import *
LetsPlot.setup_html()
df_x = pd.DataFrame({
"x": [1, 2, np.nan, 4, 5, np.nan]
})
df_xy = pd.DataFrame({
"x": [1, 2, np.nan, 4, 5, np.nan],
"y": [2, np.nan, 3, 4, 5, np.nan],
"w": [1, 1, 1, np.nan, 1, 1],
"g": ["A", "A", "A", "B", "B", "B"]
})
df_qq2 = pd.DataFrame({
"x": [4, np.nan, 1, 9, 6, 2, 10, np.nan, 7, 5],
"y": [7, 1, 9, 10, 4, np.nan, 3, np.nan, 6, 5]
})
df_smooth = pd.DataFrame({
"x": [1, 2, 3, 4, 5, 6, 7, 8],
"y": [1, np.nan, 3, 4, 5, np.nan, 7, 8]
})
df_ridges = pd.DataFrame({
"x": [1, 2, np.nan, 4, 1, 2, 3, np.nan],
"y": [0, 0, 0, 0, 1, 1, 1, 1]
})
df_sampling = pd.DataFrame({
"x": list(range(10)),
"y": list(range(10))
})
ggplot(df_x, aes("x")) + geom_bar()
ggplot(df_xy, aes("x", "y")) + geom_pie()
ggplot(df_x, aes("x")) + geom_histogram(bins=4)
ggplot(df_xy, aes("x", "y")) + geom_bin2d(bins=[3, 3])
ggplot(df_xy, aes("x", "y")) + geom_hex(bins=[3, 3])
ggplot(df_x, aes("x")) + geom_dotplot(binwidth=1.0)
ggplot(df_xy, aes("x", "y")) + geom_ydotplot(binwidth=1.0)
ggplot(df_xy, aes("g", "y")) + stat_summary()
ggplot(df_xy, aes("x", "y")) + stat_summary_bin(bins=3)
geom_boxplot() exercises boxplot-related stats, including outlier handling.
ggplot(df_xy, aes("g", "y")) + geom_boxplot()
ggplot(df_x, aes("x")) + geom_density()
ggplot(df_xy, aes("x", "y")) + geom_density2d()
ggplot(df_xy, aes("x", "y")) + geom_density2df()
ggplot(df_xy, aes("g", "y")) + geom_violin()
ggplot(df_xy, aes("g", "y")) + geom_sina()
ggplot(df_ridges, aes("x", "y")) + geom_area_ridges()
ggplot(df_x, aes("x")) + stat_ecdf()
ggplot(df_x, aes(sample="x")) + geom_qq()
ggplot(df_x, aes(sample="x")) + geom_qq_line()
For qq2/qq2line, a row is counted as removed only when both x and y are non-finite. In df_qq2, only row 8 has both values missing.
ggplot(df_qq2, aes("x", "y")) + geom_qq2()
ggplot(df_qq2, aes("x", "y")) + geom_qq2_line()
This example should show non-finite removal. It also uses LOESS sampling (max_n=4) to check the sampling message inside stat.
ggplot(df_smooth, aes("x", "y")) + \
geom_smooth(method="loess", max_n=6, seed=42, labels=smooth_labels())
ggplot(df_xy, aes("x", "y")) + geom_pointdensity()