from lets_plot import *
from lets_plot.geo_data import *
from lets_plot.settings_utils import geocoding_service
#LetsPlot.set(geocoding_service(url='http://3.86.228.157:3025'))
import pandas as pd
LetsPlot.setup_html()
income_all = pd.read_csv('https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/US_household_income_2017.csv', encoding='latin-1')
income_all.head(3)
income_by_state = income_all.groupby("State_Name", as_index=False)["Mean"].mean()
income_by_state.head(3)
income_by_county = income_all.groupby(["State_Name","County"], as_index=False)["Mean"].mean()
income_by_county.head(3)
us48 = regions_state('us-48').to_data_frame()['found name'].tolist()
data = income_by_county
data = data[data.State_Name.isin(us48)]
row_count, _ = data.shape
print(row_count)
counties = regions_builder2('county',
names=data["County"].tolist(),
states=data["State_Name"].tolist())\
.drop_not_matched()\
.build()
counties.to_data_frame()
centroids=counties.centroids()
centroids
# map_join is lacking multi-key support, so we use pandas.merge
data_with_geometry = centroids.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])
data_with_geometry
ggplot() + geom_point(aes(color='Mean'), data_with_geometry)
boundaries=counties.boundaries()
boundaries
# map_join is lacking multi-key support, so we use pandas.merge
data_with_boundaries = boundaries.merge(data, left_on=['request', 'state'], right_on=['County', 'State_Name'])
data_with_boundaries
map_theme = theme(axis_line="blank", axis_text="blank", axis_title="blank", axis_ticks="blank") + ggsize(900, 400)
ggplot() + geom_map(aes(fill='Mean'), data_with_boundaries) + scale_fill_gradient(low="#007BCD", high="#FE0968", name="Mean income") + map_theme
Issues
# drop_not_found breaks parents - these columns are missing
regions_builder2('county',
names=['Wayne County', 'Not existing County', 'Anson County'],
states=['New York', 'New York', 'North Carolina'],
countries=['usa', 'usa', 'usa'])\
.drop_not_found()\
.build()
# issue with parents geocoding - unexpected ranking behaviour results in broken responses.
# When mulitply object found by one request ambiguous response is generated without use of ranking by weight.
# Ambiguous response is also borken - it returns success response with first namesake object ¯\_(ツ)_/¯
regions_builder2('county',
names=['Wayne County', 'Essex County'],
states=['New York', 'Virginia'],
countries=['usa', 'usa'])\
.build()
# not informative error message
regions_builder2('county',
names=['Wayne County', 'Essex County'],
states=['New York', 'Virginia'],
countries=['usa'])\
.build()
# regions in parent is not yet supported
state_regions = regions_builder2('state', names=data["State_Name"].tolist(), countries=['uSa'] * row_count).build()
counties_via_regions = regions_builder2('county',
names=data["County"].tolist(),
states=state_regions)\
.drop_not_matched()\
.build()
counties_via_regions.to_data_frame()
regions_builder2('state', names=['florida'], scope='Uruguay').build()
regions_builder2('state', names=['florida', 'florida'], countries=['usa', 'Uruguay']).build()