In [1]:
import pandas as pd

from lets_plot import *
In [2]:
LetsPlot.setup_html()
In [3]:
mpg = pd.read_csv ("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")
mpg.head(3)
Out[3]:
Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy fl class
0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
In [4]:
subaru = mpg.loc[mpg['manufacturer'] == 'subaru'] 

p = ggplot(mpg, aes('displ', 'hwy')) \
  + geom_point(data=subaru, color='orange', size = 5) \
  + geom_point()
p
Out[4]:
In [5]:
p \
   + geom_text(x=5.05, y=35, label='subaru', hjust='left', color='#d76e00', size=10) \
   + geom_curve(x=5, y=35, xend=2.62, yend=27, 
                curvature=0.2, arrow=arrow(length=6),
                color='#d76e00')
Out[5]:
In [6]:
p \
   + geom_text(x=4.2, y=25, label='subaru', hjust='left', color='#d76e00', size=10) \
   + geom_curve(x=4.5, y=26.2, xend=2.62, yend=27, 
                curvature=0.5, angle=60, arrow=arrow(length=6),
                color='#d76e00')
Out[6]:
In [7]:
p \
    + geom_text(x=3, y=12, label='subaru', hjust='left', color='#d76e00', size=10) \
    + geom_curve(x=2.95, y=12, xend=2.5, yend=22, 
                 curvature=-0.3, arrow=arrow(length=6),
                 color='#d76e00')
Out[7]:
In [ ]:
 
In [8]:
mpg_cyl5 = mpg.loc[mpg['cyl'] == 5]


ggplot(mpg, aes('displ', 'hwy')) \
  + geom_point(data=mpg_cyl5, color='#de77ae', size=5) \
  + geom_point() \
  + geom_text(label="Five-cylinder engine", x=4,y=37, hjust=0, color='#c51b7d', size=10) \
  + geom_curve(x=3.95, y=37, xend=2.6, yend=29,
               curvature=0.1, arrow=arrow(length=6),
               color='#c51b7d')
Out[8]:
In [9]:
ggplot(mpg, aes('displ', 'hwy')) \
  + geom_point(data=mpg_cyl5, color='#de77ae', size=5) \
  + geom_point() \
  + geom_text(label="Five-cylinder engine", x=4, y=37, hjust=0, color='#c51b7d', size=10) \
  + geom_curve(data=mpg_cyl5, xend=3.95, yend=37,
               size_start=5,
               curvature=0.1, arrow=arrow(length=6, ends='first'),
               color='#c51b7d')
Out[9]:
In [10]:
mpg['manufacturer'].value_counts()
Out[10]:
dodge         37
toyota        34
volkswagen    27
ford          25
chevrolet     19
audi          18
hyundai       14
subaru        14
nissan        13
honda          9
jeep           8
pontiac        5
land rover     4
mercury        4
lincoln        3
Name: manufacturer, dtype: int64
In [11]:
brand = 'pontiac'
brand_df = mpg.loc[mpg['manufacturer'] == brand] 

ggplot(mpg, aes('displ', 'hwy')) \
  + geom_point(data=brand_df, color='#bd423f', size=5) \
  + geom_point() \
  + geom_text(label=brand, x=6, y=37, hjust=1, color='#bd423f', size=10) \
  + geom_curve(data=brand_df, xend=5.95, yend=35,
               size_start=5,
               curvature=-0.1, arrow=arrow(length=6, ends='first'),
               color='#bd423f') \
  + xlim(3,6)
Out[11]: