In [1]:
from lets_plot import *
from lets_plot.geo_data import *

from lets_plot.settings_utils import geocoding_service
#LetsPlot.set(geocoding_service(url='http://3.86.228.157:3025'))

import pandas as pd

LetsPlot.setup_html()
The geodata is provided by © OpenStreetMap contributors and is made available here under the Open Database License (ODbL).
In [2]:
income_all = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/US_household_income_2017.csv', encoding='latin-1')
income_all.head(3)
Out[2]:
id State_Code State_Name State_ab County City Place Type Primary Zip_Code Area_Code ALand AWater Lat Lon Mean Median Stdev sum_w
0 1011000 1 Alabama AL Mobile County Chickasaw Chickasaw city City place 36611 251 10894952 909156 30.771450 -88.079697 38773 30506 33101 1638.260513
1 1011010 1 Alabama AL Barbour County Louisville Clio city City place 36048 334 26070325 23254 31.708516 -85.611039 37725 19528 43789 258.017685
2 1011020 1 Alabama AL Shelby County Columbiana Columbiana city City place 35051 205 44835274 261034 33.191452 -86.615618 54606 31930 57348 926.031000
In [3]:
income_by_state = income_all.groupby("State_Name", as_index=False)["Mean"].mean()
income_by_state.head(3)
Out[3]:
State_Name Mean
0 Alabama 53612.925856
1 Alaska 77670.209524
2 Arizona 62578.071313
In [4]:
income_by_county = income_all.groupby(["State_Name","County"], as_index=False)["Mean"].mean()
income_by_county.head(3)
Out[4]:
State_Name County Mean
0 Alabama Autauga County 53735.557235
1 Alabama Barbour County 37725.000000
2 Alabama Blount County 55127.000000
In [5]:
us48 = regions_state('us-48').to_data_frame()['found name'].tolist()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
C:\Temp/ipykernel_10588/1550784517.py in <module>
----> 1 us48 = regions_state('us-48').to_data_frame()['found name'].tolist()

NameError: name 'regions_state' is not defined
In [ ]:
data = income_by_county
data = data[data.State_Name.isin(us48)]
row_count, _ = data.shape
print(row_count)
In [ ]:
counties = regions_builder2('county', 
                            names=data["County"].tolist(), 
                            states=data["State_Name"].tolist())\
    .drop_not_matched()\
    .build()
In [ ]:
counties.to_data_frame()
In [ ]:
centroids=counties.centroids()
centroids
In [ ]:
# map_join is lacking multi-key support, so we use pandas.merge
data_with_geometry = centroids.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])
data_with_geometry
In [ ]:
ggplot() + geom_point(aes(color='Mean'), data_with_geometry)
In [ ]:
boundaries=counties.boundaries()
boundaries
In [ ]:
# map_join is lacking multi-key support, so we use pandas.merge
data_with_boundaries = boundaries.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])
data_with_boundaries
In [ ]:
map_theme = theme(axis_line="blank", axis_text="blank", axis_title="blank", axis_ticks="blank") + ggsize(900, 400)
ggplot() + geom_map(aes(fill='Mean'), data_with_boundaries) + scale_fill_gradient(low="#007BCD", high="#FE0968", name="Mean income") + map_theme

Issues

In [ ]:
# drop_not_found breaks parents - these columns are missing
regions_builder2('county', 
                 names=['Wayne County', 'Not existing County', 'Anson County'],
                states=['New York', 'New York', 'North Carolina'],
                countries=['usa', 'usa', 'usa'])\
    .drop_not_found()\
    .build()
In [ ]:
# issue with parents geocoding - unexpected ranking behaviour results in broken responses.
# When mulitply object found by one request ambiguous response is generated without use of ranking by weight. 
# Ambiguous response is also borken - it returns success response with first namesake object ¯\_(ツ)_/¯
regions_builder2('county', 
                 names=['Wayne County', 'Essex County'],
                states=['New York', 'Virginia'],
                countries=['usa', 'usa'])\
    .build()
In [ ]:
# not informative error message
regions_builder2('county', 
                 names=['Wayne County', 'Essex County'],
                states=['New York', 'Virginia'],
                countries=['usa'])\
    .build()
In [ ]:
# regions in parent is not yet supported
state_regions = regions_builder2('state', names=data["State_Name"].tolist(), countries=['uSa'] * row_count).build()
counties_via_regions = regions_builder2('county', 
                            names=data["County"].tolist(), 
                            states=state_regions)\
    .drop_not_matched()\
    .build()
counties_via_regions.to_data_frame()
In [ ]:
regions_builder2('state', names=['florida'], scope='Uruguay').build()
In [ ]:
regions_builder2('state', names=['florida', 'florida'], countries=['usa', 'Uruguay']).build()