In [1]:
import numpy as np
import pandas as pd

from lets_plot import *
from lets_plot.mapping import as_discrete
In [2]:
LetsPlot.setup_html()
In [3]:
data = pd.read_csv('coord_polar_wind.csv')
wind_df = data.copy()

def is_float(x):
    try:
        float(x)
    except ValueError:
        return False
    return True


wind_df = wind_df[wind_df.sped.apply(lambda x: is_float(x))]
wind_df = wind_df[wind_df.drct.apply(lambda x: is_float(x))]

wind_df['sped'] = wind_df['sped'].astype(float)
wind_df['drct'] = wind_df['drct'].astype(float)

wind_df = wind_df[wind_df.sped.apply(lambda x: x >= 2.0)]

wind_df.head()
Out[3]:
station valid drct sped
0 PEA 2002-05-17 00:14 330.0 8.05
1 PEA 2002-05-17 00:20 330.0 8.05
2 PEA 2002-05-17 00:34 340.0 10.35
3 PEA 2002-05-17 00:40 340.0 10.35
4 PEA 2002-05-17 00:54 340.0 14.95
In [4]:
# Compute calm
calm = 100 - wind_df.shape[0] / data.shape[0] * 100

# Define the speed bins
bins = [2, 5, 7, 10, 15, 20, float('inf')]
bin_ids = list(range(6))

wind_df['speed_group'] = pd.cut(wind_df['sped'], bins=bins, labels=bin_ids, right=False)

# Group by 'drct' and 'speed_group', and count the occurrences
grouped_counts = wind_df.groupby(['drct', 'speed_group']).size().reset_index(name='count')

# Calculate the total number of observations in the dataset
total_observations = wind_df.shape[0]

# Calculate the percentage of each speed group within each direction relative to the total number of observations
grouped_counts['percentage_of_total'] = (grouped_counts['count'] / total_observations) * 100
In [5]:
ggplot(grouped_counts) \
    + geom_bar(
        aes('drct', 'percentage_of_total', fill=as_discrete('speed_group', order=1)), 
        size=0, width=0.8,
        stat='identity',
        tooltips=layer_tooltips().format('^y', '{.2g}%').format('^x', '{}°')
    ) \
    + geom_rect(
        # visually align the width of the rectangle with the bars - widen it by 5 (half a bar width)
        xmin=5, xmax=365, 
        ymin=-1, ymax=0, fill='white', size=0
    ) \
    + geom_hline(yintercept=0, size=2) \
    + geom_text(x=180, y=-1, label=f'Calm\n{calm:.1f}%', hjust='middle', vjust='center', size='10') \
    + scale_fill_manual(
        name='Wind Speed (mph):', 
        values=['#002bff', '#03d3f8', '#7afe81', '#fde609', '#ff4404', '#780200'], 
        labels={
            0: '2 - 4.9', 
            1: '5 - 6.9', 
            2: '7 - 9.9', 
            3: '10 - 14.9', 
            4: '15 - 19.9', 
            5: '20+'
        },
    ) \
    + scale_y_continuous(
        breaks=[0, 1, 2, 3, 4, 5], # to not add automatically generated ticks for values outside of the data range.
        format='{}%'
    ) \
    + scale_x_continuous(
        # no need in this expand - x-axis domain already adjusted by geom_rect
        #expand=[0, 1], # set additive expand to 1 to make a gap between first and last bar
        labels={
            45:  'NE', 
            90:  'E', 
            135: 'SE', 
            180: 'S', 
            225: 'SW',
            270: 'W',
            315: 'NW',
            360: 'N', 
        },
    ) \
    + ggsize(800, 800) \
    + theme_minimal2() \
    + theme(
        panel_grid_minor_x=element_line(),
        panel_grid=element_line(color='#A0A0A0'),
        axis_ticks_y=element_line(),
        axis_text_x=element_text(size=18),
        axis_title=element_blank()
    ) \
    + coord_polar(
        ylim=[-1, None], # -1 is to make inner circle
        start=(3.14 * 2) / 36 / 2 # divide by 2 (i.e. rotate by half a bar width) to make the N-S axis perpendicular
    )
Out[5]: