geom_pointdensity()

In [1]:
import numpy as np
import pandas as pd

from lets_plot import *
from lets_plot.geo_data import *
from lets_plot.bistro import *
The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).
In [2]:
LetsPlot.setup_html()
In [3]:
sample_size = 1000
df_full = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/refs/heads/master/data/diamonds.csv")
print(df_full.shape)
df = df_full.sample(sample_size, random_state=42).reset_index(drop=True)
df.head()
(53940, 10)
Out[3]:
carat cut color clarity depth table price x y z
0 0.24 Ideal G VVS1 62.1 56.0 559 3.97 4.00 2.47
1 0.58 Very Good F VVS2 60.0 57.0 2201 5.44 5.42 3.26
2 0.40 Ideal E VVS2 62.1 55.0 1238 4.76 4.74 2.95
3 0.43 Premium E VVS2 60.8 57.0 1304 4.92 4.89 2.98
4 1.55 Ideal E SI2 62.3 55.0 6901 7.44 7.37 4.61

Default Plot

In [4]:
ggplot(df_full, aes("carat", "price")) + geom_pointdensity()
Out[4]:

Synthetic Datasets

In [5]:
def get_mn_data(cov=[[1, 0], [0, 1]], size=250, seed=42):
    np.random.seed(seed)
    mean=[0, 0]
    x, y = np.random.multivariate_normal(mean, cov, size).T
    return {'x': x, 'y': y}

def get_nu_data(size=250, seed=42):
    np.random.seed(seed)
    return {
        'x': np.random.uniform(size=size),
        'y': np.random.normal(size=size),
    }

def get_nu_data(size=250, seed=42):
    np.random.seed(seed)
    return {
        'x': np.random.uniform(size=size),
        'y': np.random.normal(size=size),
    }

def get_np_data(scale, size=250, seed=42):
    np.random.seed(seed)
    return {
        'x': np.random.poisson(size=size) + np.random.normal(scale=scale, size=size),
        'y': np.random.normal(size=size),
    }

def get_pointdensity_plot(data, method):
    return ggplot(data, aes("x", "y")) + geom_pointdensity(method=method) + ggtitle("Method: {0}".format(method))

gggrid([
    get_pointdensity_plot(get_mn_data(), 'neighbours'),
    get_pointdensity_plot(get_mn_data(), 'kde2d'),
    get_pointdensity_plot(get_mn_data(cov=[[1, 0.9], [0.9, 1]]), 'neighbours'),
    get_pointdensity_plot(get_mn_data(cov=[[1, 0.9], [0.9, 1]]), 'kde2d'),
    get_pointdensity_plot(get_mn_data(cov=[[1, -0.75], [-0.75, 1]]), 'neighbours'),
    get_pointdensity_plot(get_mn_data(cov=[[1, -0.75], [-0.75, 1]]), 'kde2d'),
    get_pointdensity_plot(get_nu_data(), 'neighbours'),
    get_pointdensity_plot(get_nu_data(), 'kde2d'),
    get_pointdensity_plot(get_np_data(.1), 'neighbours'),
    get_pointdensity_plot(get_np_data(.1), 'kde2d'),
], ncol=2)
Out[5]:

Aesthetics

weight

In [6]:
weighted_data = {
    'x': [0, 0, 1],
    'y': [0, 1, 0],
    'w': [1, 2, 1],
}
tooltips_with_weights = layer_tooltips().line("@|@..density..")\
                                        .line("'neighbours count'|@..count..")\
                                        .line("weight|@w")

gggrid([
    ggplot(weighted_data, aes('x', 'y')) + \
        geom_pointdensity(adjust=150, tooltips=tooltips_with_weights) + \
        ggtitle("method='neighbors', without weights"),
    ggplot(weighted_data, aes('x', 'y')) + \
        geom_pointdensity(aes(weight='w'), adjust=150, tooltips=tooltips_with_weights) + \
        ggtitle("method='neighbors', with weights"),
    ggplot(weighted_data, aes('x', 'y')) + \
        geom_pointdensity(method='kde2d', tooltips=tooltips_with_weights) + \
        ggtitle("method='kde2d', without weights"),
    ggplot(weighted_data, aes('x', 'y')) + \
        geom_pointdensity(aes(weight='w'), method='kde2d', tooltips=tooltips_with_weights) + \
        ggtitle("method='kde2d', with weights"),
], ncol=2)
Out[6]:

Other aesthetics

In [7]:
ggplot(df, aes("carat", "price")) + \
    geom_pointdensity(aes(alpha="..density.."), color="black") + \
    ggtitle("Custom color and alpha")
Out[7]:
In [8]:
ggplot(df, aes("carat", "price")) + \
    geom_pointdensity(aes(fill="..density.."), method='kde2d', color="black", shape=21) + \
    ggtitle("Custom shape, color and fill")
Out[8]:
In [9]:
ggplot(df, aes("carat", "price")) + \
    geom_pointdensity(method='kde2d', shape=17, angle=90) + \
    ggtitle("Custom shape and angle")
Out[9]:
In [10]:
ggplot(df, aes("carat", "price")) + \
    geom_pointdensity(aes(size="depth"), method='kde2d', alpha=.2) + \
    scale_size(range=[1, 5]) + \
    ggtitle("Custom size and alpha")
Out[10]:
In [11]:
ggplot(df, aes("carat", "price")) + \
    geom_pointdensity(method='kde2d', shape=1, stroke=2, alpha=.5) + \
    ggtitle("Custom shape, stroke and alpha")
Out[11]:

Parameters

Own Parameters

Unique Parameters

In [12]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_pointdensity() + ggtitle("Default method (auto)"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='neighbours') + ggtitle("method='neighbours'"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtitle("method='kde2d'"),
], ncol=3)
Out[12]:

If method=‘auto’ (default), the selection is determined by the size of the dataset.

Note: If grouping is used, the method is selected independently for each group:

In [13]:
ideal_df = df_full.assign(is_ideal_quality=(df_full["cut"] == "Ideal").map({True: "Quality: ideal", False: "Quality: not ideal"}))

ggplot(ideal_df, aes("carat", "price")) + \
    geom_pointdensity() + \
    facet_grid(x="is_ideal_quality")
Out[13]:
In [14]:
ideal_df["is_ideal_quality"].value_counts()
Out[14]:
is_ideal_quality
Quality: not ideal    32389
Quality: ideal        21551
Name: count, dtype: int64

Density-like Parameters

In [15]:
g = ggplot(df, aes("carat", "price")) + scale_color_continuous(low="darkgreen", high="tomato")

gggrid([
    g + geom_pointdensity(method='kde2d') + ggtitle("Default kernel"),
    g + geom_pointdensity(method='kde2d', kernel='cosine') + ggtitle("kernel='cosine'"),
    g + geom_pointdensity(method='kde2d', kernel='optcosine') + ggtitle("kernel='optcosine'"),
    g + geom_pointdensity(method='kde2d', kernel='rectangular') + ggtitle("kernel='rectangular'"),
    g + geom_pointdensity(method='kde2d', kernel='triangular') + ggtitle("kernel='triangular'"),
    g + geom_pointdensity(method='kde2d', kernel='biweight') + ggtitle("kernel='biweight'"),
    g + geom_pointdensity(method='kde2d', kernel='epanechikov') + ggtitle("kernel='epanechikov'"),
], ncol=3)
Out[15]:
In [16]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtitle("Default adjust"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', adjust=10) + ggtitle("adjust=10"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', adjust=.1) + ggtitle("adjust=0.1"),
], ncol=2)
Out[16]:
In [17]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtitle("Default bw"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', bw='nrd0') + ggtitle("bw='nrd0'"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', bw=[1, 5000]) + ggtitle("bw=[1, 5000]"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', bw=[.02, 500]) + ggtitle("bw=[.02, 500]"),
], ncol=2)
Out[17]:
In [18]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtitle("Default n"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', n=3) + ggtitle("n=3"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', n=999) + ggtitle("n=999"),
], ncol=2)
Out[18]:

Standard Parameters

In [19]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtitle("Default position"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', position=position_nudge(y=5_000)) + \
        coord_cartesian(ylim=[5_000, 25_000]) + ggtitle("position=position_nudge(y=5_000)"),
])
Out[19]:
In [20]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtitle("Default show_legend"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', show_legend=False) + ggtitle("show_legend=False"),
])
Out[20]:
In [21]:
gggrid([
    ggplot(df, aes(alpha="..density..")) + \
        geom_pointdensity(aes("carat", "price"), method='kde2d') + \
        ggtitle("Default inherit_aes"),
    ggplot(df, aes(alpha="..density..")) + \
        geom_pointdensity(aes("carat", "price"), method='kde2d', inherit_aes=False) + \
        ggtitle("inherit_aes=False"),
])
Out[21]:
In [22]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtitle("Default manual_key"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d', manual_key="point density") + ggtitle('manual_key="point density"'),
])
Out[22]:
In [23]:
gggrid([
    ggplot(df, aes("carat", "price")) + \
        geom_pointdensity(method='kde2d') + \
        coord_cartesian(xlim=[0, 5], ylim=[0, 20_000]) + \
        ggtitle("Default sampling"),
    ggplot(df, aes("carat", "price")) + \
        geom_pointdensity(method='kde2d', sampling=sampling_random(int(sample_size / 10), seed=42)) + \
        coord_cartesian(xlim=[0, 5], ylim=[0, 20_000]) + \
        ggtitle("sampling=sampling_random(250, seed=42)"),
])
Out[23]:
In [24]:
pointdensity_tooltips = layer_tooltips().title("@cut diamond")\
                                        .line("position|(^x, ^y)")\
                                        .line("@|@..density..")\
                                        .line("count|@..count..")\
                                        .line("scaled|@..scaled..")
ggplot(df, aes("carat", "price")) + \
    geom_pointdensity(method='kde2d', tooltips=pointdensity_tooltips) + \
    ggtitle("Custom tooltips")
Out[24]:
In [25]:
ggplot(df, aes("carat", "price")) + \
    geom_pointdensity(aes(paint_a="..density..", paint_b="..density.."),
                      method='kde2d', shape=21,
                      color_by='paint_a', fill_by='paint_b') + \
    scale_brewer('paint_a', palette="Reds") + \
    scale_brewer('paint_b', palette="Oranges") + \
    ggtitle("Use of color_by and fill_by")
Out[25]:

map, map_join, use_crs

In [26]:
def get_border_gdf(country_name):
    return geocode_countries(country_name).get_boundaries(15)

def get_cities_gdf(country_name, border_gdf):
    result = geocode_cities().scope(country_name).get_centroids()
    return result[result["geometry"].intersects(border_gdf["geometry"].iloc[0])]

def get_cities_df(cities_gdf, *, value_limit=1_000_000, seed=42):
    np.random.seed(seed)
    return pd.DataFrame({
        "name": cities_gdf["found name"],
        "value": np.random.randint(value_limit, size=cities_gdf.shape[0])
    })
In [27]:
country_name = "Greece"
In [28]:
border_gdf = get_border_gdf(country_name)
print(border_gdf.crs)
border_gdf
EPSG:4326
Out[28]:
country found name geometry
0 Greece Greece MULTIPOLYGON (((19.41069 39.83865, 19.40278 39...
In [29]:
cities_gdf = get_cities_gdf(country_name, border_gdf)
print(cities_gdf.shape)
print(cities_gdf.crs)
cities_gdf.head()
(562, 3)
EPSG:4326
Out[29]:
city found name geometry
0 Athens Athens POINT (23.74147 37.99086)
1 Stonařov Stonařov POINT (23.74147 37.99086)
2 Voula Voula POINT (23.77161 37.84796)
3 Municipal Unit of Nikaia Municipal Unit of Nikaia POINT (23.63583 37.97915)
4 Municipal Unit of Nea Filadelfeia Municipal Unit of Nea Filadelfeia POINT (23.74095 38.0449)
In [30]:
cities_df = get_cities_df(cities_gdf)
print(cities_df.shape)
cities_df.head()
(562, 2)
Out[30]:
name value
0 Athens 121958
1 Stonařov 671155
2 Voula 131932
3 Municipal Unit of Nikaia 365838
4 Municipal Unit of Nea Filadelfeia 259178
In [31]:
gggrid([
    ggplot() + \
        geom_map(data=border_gdf) + \
        geom_pointdensity(map=cities_gdf) + \
        scale_color_viridis(guide='none') + \
        ggtitle("map=cities_gdf"),
    ggplot() + \
        geom_map(data=border_gdf) + \
        geom_pointdensity(aes(size="value"), data=cities_df, map=cities_gdf, map_join=["name", "found name"]) + \
        scale_color_viridis(guide='none') + \
        scale_size(range=[1, 3]) + \
        ggtitle('map_join=["name", "found name"]'),
    ggplot() + \
        geom_map(data=border_gdf, use_crs=3857) + \
        geom_pointdensity(aes(size="value"), data=cities_df, map=cities_gdf, map_join=["name", "found name"], use_crs=3857) + \
        scale_color_viridis(guide='none') + \
        scale_size(range=[1, 3]) + \
        ggtitle("use_crs=3857"),
]) + ggtb()
Out[31]:

Stat

stat='identity'

In [32]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtitle("Default stat"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(stat='identity') + ggtitle("stat='identity'"),
])
Out[32]:

'pointdensity' Stat

In [33]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_bin2d() + ggtitle("geom_bin2d() with default stat"),
    ggplot(df, aes("carat", "price")) + geom_bin2d(aes(fill="..density.."), stat='pointdensity', method='kde2d', width=3, height=3, size=3) + ggtitle("geom_bin2d(stat='pointdensity')"),
])
Out[33]:

Interaction with other layers

ggmarginal()

In [34]:
ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggmarginal("tr", layer=geom_density())
Out[34]:

ggtb()

In [35]:
ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtb()
Out[35]:

Facets

In [36]:
ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + facet_grid(x="cut")
Out[36]:

Coordinate Systems

In [37]:
gggrid([
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + ggtitle("Default coordinate system"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + coord_flip() + ggtitle("coord_flip()"),
    ggplot(df, aes("carat", "price")) + geom_pointdensity(method='kde2d') + coord_polar() + ggtitle("coord_polar()"),
])
Out[37]:

Livemap

In [38]:
ggplot() + \
    geom_livemap(data_size_zoomin=2) + \
    geom_pointdensity(aes(size="value"), data=cities_df, map=cities_gdf, map_join=["name", "found name"]) + \
    scale_color_viridis(guide='none') + \
    scale_size(range=[1, 3])
Out[38]:

Themes

In [39]:
ggplot(df, aes("carat", "price")) + \
    geom_pointdensity(method='kde2d') + \
    scale_color_brewer(palette="Oranges") + \
    theme_minimal() + \
    theme(legend_position='bottom') + \
    flavor_darcula()
Out[39]:

Joint Plot

In [40]:
joint_plot(df, "carat", "price", geom='pointdensity', marginal="box:tr:.02")
Out[40]:

Residual Plot

In [41]:
residual_plot(df, "carat", "price", geom='pointdensity')
Out[41]:

Tests

Boundary Tests

In [42]:
tests = [
    {
        'title': "Empty data",
        'data': {
            'x': [],
            'y': [],
        }
    },
    {
        'title': "One element",
        'data': {
            'x': [0],
            'y': [0],
        }
    },
    {
        'title': "NaN's in data",
        'data': {
            'x': [0, np.nan, None, 1, 2],
            'y': [0, 3, 4, np.nan, None],
        }
    },
]

gggrid([
    ggplot(t['data'], aes('x', 'y')) + \
        geom_pointdensity() + \
        ggtitle(t['title'])
    for t in tests
], ncol=2)
Out[42]:

Emptiness Tests

In [43]:
def get_mapping(var):
    if var is None:
        return aes('x', 'y')
    else:
        return aes('x', 'y', color=var, fill=var)

p_working = lambda var: ggplot({'x': [0], 'y': [0]}, get_mapping(var))

p_no_data = lambda var: ggplot(mapping=aes('x', 'y', color=var, fill=var)) if var is not None else ggplot(mapping=aes('x', 'y'))

p_empty = lambda var: ggplot({'x': [], 'y': []}, get_mapping(var))

p_facet_nan = lambda var: ggplot({'x': [0, np.nan], 'y': [0, 0], 'g': ["A", "B"]}, get_mapping(var)) + facet_grid(x='g')

p_facet_cross = lambda var: ggplot({'x': [0, 0], 'y': [0, 0], 'g1': ["A", "B"], 'g2': ["C", "D"]}, get_mapping(var)) + facet_grid(x='g1', y='g2')

p_group = ggplot({'x': [0, np.nan], 'y': [0, 0], 'g': ["A", "B"]}, aes('x', 'y', color='g', fill='g'))

p_nan = lambda var: ggplot({'x': [np.nan], 'y': [np.nan]}, get_mapping(var))

p_lim = lambda var: ggplot({'x': [0], 'y': [0]}, get_mapping(var)) + xlim(1, 2)

gggrid([
    p_working("..density..") + geom_pointdensity(),
    p_no_data("..density..") + geom_pointdensity(),
    p_empty("..density..") + geom_pointdensity(),
    p_facet_nan("..density..") + geom_pointdensity(),
    p_facet_cross("..density..") + geom_pointdensity(),
    p_group + geom_pointdensity(),
    p_nan("..density..") + geom_pointdensity(),
    p_lim("..density..") + geom_pointdensity(),
], ncol=2)
Out[43]: