hc_nba_cookbook.Rmd
library(asbviz)
library(nbastatR)
library(tidyverse)
library(glue)
library(highcharter)
library(janitor)
library(forecast)
library(magrittr)
Recreating this amazing tutorial from Tom Bishop
Lets modify the analysis to explore the 2021 season instead of the 2020 season.
gamelogs <- game_logs(seasons = 2021) %>% clean_names()
## Acquiring NBA basic player game logs for the 2020-21 Regular Season
Fix the team logos
gamelogs <- gamelogs %>%
mutate(url_team_season_logo = url_team_season_logo %>% str_replace_all("2020-21", "2019-20"))
Lets get some data and do this.
tbl_top_10_ppg <- gamelogs %>%
group_by(name_player) %>%
summarise(ppg = mean(pts)) %>%
arrange(desc(ppg)) %>%
ungroup() %>%
slice(1:10)
tbl_top_10_ast <- gamelogs %>%
group_by(name_player, url_player_headshot) %>%
summarise(apg = mean(ast)) %>%
arrange(desc(apg)) %>%
ungroup() %>%
slice(1:10)
tbl_top_30_reb <-
gamelogs %>%
group_by(name_player, url_player_headshot) %>%
summarise(orebpg = mean(oreb), drebpg = mean(dreb), trebpg = mean(treb)) %>%
arrange(desc(trebpg)) %>%
ungroup() %>%
slice(1:30)
tbl_top_10_ppg %>%
ggplot(aes(x = name_player, y = ppg)) +
geom_col()
Doesn’t look great, lets make it look better using hc_xy
tbl_top_10_ast %>%
hc_xy(
x = "name_player",
y = "apg",
type = "column",
title = "Top 10 Passers",
image = "url_player_headshot",
label_parameters = list(
align = "left",
crop = T,
enabled = T,
format = "{point.y} apg"
),
invert_chart = F,
transparent_tooltip = F,
disable_legend = T,
subtitle = "2021-21 Season",
caption = "Column Chart",
override_x_text = list(text = ""),
override_y_label = list(format = "{value}"),
override_y_text = list(text = "Assists Per Game")
)
tbl_top_10_ast %>%
hc_xy(
x = "name_player",
y = "apg",
type = "bar",
title = "Top 10 Passers",
image = "url_player_headshot",
label_parameters = list(
align = "left",
crop = T,
enabled = T,
format = "{point.y} apg"
),
transparent_tooltip = F,
disable_legend = T,
subtitle = "2021-21 Season",
override_x_text = list(text = ""),
override_y_label = list(format = "{value}"),
caption = "Bar Chart",
override_y_text = list(text = "Assists Per Game")
)
Top rebounders
tbl_top_30_reb %>%
hc_xy(
x = "drebpg",
y = "orebpg",
name = "name_player",
type = "scatter",
title = "Top 30 Rebounders",
marker = "url_player_headshot",
label_parameters = list(
align = "left",
crop = T,
enabled = T,
format = "{point.name}"
),
transparent_tooltip = F,
disable_legend = T,
subtitle = "2021-21 Season",
transformations = c("mean_x", "mean_y"),
fits = "lm",
override_x_text = list(text = "Defensive Rebounds"),
override_y_label = list(format = "{value}"),
caption = "Scatter Chart",
override_y_text = list(text = "Offensive Rebounds Per Game")
)
## [2021-10-08 13:04:18 s.LM] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 30 x 1
## Training outcome: 30 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:18 s.LM] Training linear model...
##
## [ LM Regression Training Summary ]
## MSE = 0.86 (1.52%)
## RMSE = 0.93 (0.77%)
## MAE = 0.75 (2.60%)
## r = 0.12 (p = 0.52)
## rho = 0.15 (p = 0.43)
## R sq = 0.02
##
## [2021-10-08 13:04:18 s.LM] Run completed in 3.1e-03 minutes (Real: 0.18; User: 0.14; System: 0.03)
tbl_top_30_reb %>%
hc_xy(
x = "drebpg",
y = "orebpg",
size = "trebpg",
name = "name_player",
title = "Top 30 Rebounders",
label_parameters = list(
align = "left",
crop = T,
enabled = T,
format = "{point.name}"
),
transparent_tooltip = F,
disable_legend = T,
subtitle = "2021-21 Season",
transformations = c("mean_x", "mean_y"),
fits = "lm",
plot_symbol = "circle",
override_x_text = list(text = "Defensive Rebounds"),
override_y_label = list(format = "{value}"),
caption = "Scatter Chart",
override_y_text = list(text = "Offensive Rebounds Per Game")
)
## [2021-10-08 13:04:19 s.LM] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 30 x 1
## Training outcome: 30 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:19 s.LM] Training linear model...
##
## [ LM Regression Training Summary ]
## MSE = 0.86 (1.52%)
## RMSE = 0.93 (0.77%)
## MAE = 0.75 (2.60%)
## r = 0.12 (p = 0.52)
## rho = 0.15 (p = 0.43)
## R sq = 0.02
##
## [2021-10-08 13:04:19 s.LM] Run completed in 3e-03 minutes (Real: 0.18; User: 0.13; System: 0.03)
Lets explore Luka Doncic with some wins and models by whether the team won.
gamelogs %>%
filter(name_player == "Luka Doncic") %>%
hc_xy(
x = "number_game_player_season",
y = "pts",
group = "is_win",
color_palette = "pals::kovesi.isoluminant_cgo_70_c39",
image = "url_player_headshot",
type = "line",
axis_scrollbars = c("x"),
x_min_max = c(0, 50),
title = "Luka Doncic Points by Game by Game Won",
subtitle = "2020-21 Season",
fits = c("loess"),
roll_metrics = c("mean"),
roll_periods = c(10),
override_x_text = list(text = "Player Game Number"),
override_y_text = list(text = "Points Scored", align = "high")
)
##
## Using continuous color scheme pals::kovesi.isoluminant_cgo_70_c39
##
## <colors>
## #37B7ECFF #F6906DFF
## [2021-10-08 13:04:21 s.LOESS] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 26 x 1
## Training outcome: 26 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:21 s.LOESS] Training LOESS model...
##
## [ LOESS Regression Training Summary ]
## MSE = 47.89 (18.83%)
## RMSE = 6.92 (9.90%)
## MAE = 5.66 (4.56%)
## r = 0.43 (p = 0.03)
## rho = 0.22 (p = 0.28)
## R sq = 0.19
##
## [2021-10-08 13:04:21 s.LOESS] Run completed in 2.9e-03 minutes (Real: 0.18; User: 0.13; System: 0.03)
## [2021-10-08 13:04:21 s.LOESS] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 40 x 1
## Training outcome: 40 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:21 s.LOESS] Training LOESS model...
##
## [ LOESS Regression Training Summary ]
## MSE = 54.35 (6.02%)
## RMSE = 7.37 (3.06%)
## MAE = 5.93 (3.16%)
## r = 0.25 (p = 0.13)
## rho = 0.23 (p = 0.16)
## R sq = 0.06
##
## [2021-10-08 13:04:22 s.LOESS] Run completed in 2.9e-03 minutes (Real: 0.17; User: 0.13; System: 0.03)
##
## Using continuous color scheme scico::vikO
##
## <colors>
## #4E193DFF #50193BFF
Same but a spline
gamelogs %>%
filter(name_player == "Luka Doncic") %>%
hc_xy(
x = "number_game_player_season",
y = "pts",
group = "is_win",
color_palette = "pals::kovesi.isoluminant_cgo_70_c39",
image = "url_player_headshot",
type = "spline",
axis_scrollbars = c("x"),
x_min_max = c(0, 50),
title = "Luka Doncic Points by Game by Game Won",
subtitle = "2020-21 Season",
fits = c("loess"),
roll_metrics = c("mean"),
roll_periods = c(10),
override_x_text = list(text = "Player Game Number"),
override_y_text = list(text = "Points Scored", align = "high")
)
##
## Using continuous color scheme pals::kovesi.isoluminant_cgo_70_c39
##
## <colors>
## #37B7ECFF #F6906DFF
## [2021-10-08 13:04:24 s.LOESS] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 26 x 1
## Training outcome: 26 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:24 s.LOESS] Training LOESS model...
##
## [ LOESS Regression Training Summary ]
## MSE = 47.89 (18.83%)
## RMSE = 6.92 (9.90%)
## MAE = 5.66 (4.56%)
## r = 0.43 (p = 0.03)
## rho = 0.22 (p = 0.28)
## R sq = 0.19
##
## [2021-10-08 13:04:25 s.LOESS] Run completed in 2.9e-03 minutes (Real: 0.17; User: 0.13; System: 0.03)
## [2021-10-08 13:04:25 s.LOESS] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 40 x 1
## Training outcome: 40 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:25 s.LOESS] Training LOESS model...
##
## [ LOESS Regression Training Summary ]
## MSE = 54.35 (6.02%)
## RMSE = 7.37 (3.06%)
## MAE = 5.93 (3.16%)
## r = 0.25 (p = 0.13)
## rho = 0.23 (p = 0.16)
## R sq = 0.06
##
## [2021-10-08 13:04:25 s.LOESS] Run completed in 2.6e-03 minutes (Real: 0.16; User: 0.12; System: 0.03)
##
## Using continuous color scheme ggthemes::Green-Gold
##
## <colors>
## #F4D166FF #146C36FF
Lets look at an area chart of Kyrie Irving’s points.
gamelogs %>%
filter(name_player == "Kyrie Irving") %>%
asbviz::hc_xy(
x = "number_game_player_season",
y = "pts",
color_palette = "pals::kovesi.isoluminant_cgo_70_c39",
image = "url_player_headshot",
type = "area",
axis_scrollbars = c("x"),
x_min_max = c(0, 50),
title = "Kyrie Irving Points by Game",
opacity = .5,
subtitle = "2020-21 Season",
transparent_tooltip = T,
fits = c("loess"),
roll_metrics = c("mean"),
roll_periods = c(10),
override_x_text = list(text = "Player Game Number"),
override_y_text = list(text = "Points Scored", align = "high")
)
## [2021-10-08 13:04:28 s.LOESS] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 54 x 1
## Training outcome: 54 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:28 s.LOESS] Training LOESS model...
##
## [ LOESS Regression Training Summary ]
## MSE = 71.76 (2.83%)
## RMSE = 8.47 (1.43%)
## MAE = 6.88 (-0.08%)
## r = 0.17 (p = 0.22)
## rho = 0.17 (p = 0.23)
## R sq = 0.03
##
## [2021-10-08 13:04:28 s.LOESS] Run completed in 2.8e-03 minutes (Real: 0.17; User: 0.12; System: 0.03)
##
## Using continuous color scheme grDevices::Cold
##
## <colors>
## #ACA4E2FF
There is an areaspline (smoothed edges) too.
Lets look at the Nets Scoring by FG%
gamelogs %>%
filter(slug_team == "BKN") %>%
group_by(name_player) %>%
summarise(pts = sum(pts), fgpct = sum(fgm) / sum(fga) * 100) %>%
asbviz::hc_xy(
type = "treemap",
group = "name_player",
size = "pts",
color = "fgpct",
transparent_tooltip = F,
subtitle = "2020-21 Season",
title = "Brooklyn Nets Points by FG %",
use_new_treemap = F,
tree_labels = list(
list(
level = 1,
borderWidth = 10,
borderColor = "transparent",
colorByPoint = TRUE,
dataLabels = list(enabled = TRUE)
),
list(
level = 2,
borderWidth = 0,
borderColor = "transparent",
colorVariation = list(key = "brightness", to = 0.50),
dataLabels = list(enabled = TRUE)
)
)
)
Lets look at Jarrett Allen’s rebounding performance by game using a stacked column chart.
gamelogs %>%
filter(name_player == "Jarrett Allen") %>%
select(date_game, name_player, number_game_player_season, oreb, dreb) %>%
gather(metric,
rebounds,
-c(number_game_player_season, name_player, date_game)) %>%
asbviz::hc_xy(
x = "date_game",
y = "rebounds",
type = "column",
invert_chart = F,
group = "metric",
stacking = "normal",
color_palette = "lisa::Jean_MichelBasquiat_1",
color_type = "discrete",
theme_name = "better_unica",
use_stock = T,
use_table_tooltip = T,
title = "Jarrett Allen Rebounds by Game",
subtitle = "2021 Season"
)
## <colors>
## #C11432FF #009ADAFF
##
## Using discrete color scheme lisa::Jean_MichelBasquiat_1
Lets add some custom styling to the top 10 assisters.
tbl_top_10_ast %>%
hc_xy(
x = "name_player",
y = "apg",
type = "column",
title = "Top 10 Passers",
image = "url_player_headshot",
label_parameters = list(
align = "left",
crop = T,
enabled = T,
format = "{point.y} apg"
),
invert_chart = F,
transparent_tooltip = T,
disable_legend = T,
subtitle = "2021-21 Season",
caption = "Column Chart",
override_x_text = list(text = ""),
override_y_label = list(format = "{value}"),
override_y_text = list(text = "Assists Per Game",
align = "high", # Documentation says options are: low, middle or high
margin = 10, # Number of pixels between the title and the axis line
style = list(
fontWeight = "bold", # Bold
fontSize = '1.4em', # 1.4 x tthe size of the default text
color = "#7cb5ec" # Hex code for the default blue
)
),
theme_name = "better_unica"
)
Add points to the bar
series_config <-
gamelogs %>%
group_by(name_player) %>%
summarise(apg = mean(ast), pts = mean(pts)) %>%
arrange(desc(apg)) %>%
slice(1:10) %>%
select(-apg) %>%
pivot_longer(-name_player, names_to = "stat", values_to = "y") %>%
rename(name = name_player) %>%
group_by(stat) %>%
nest() %>%
mutate(data = data %>% map(list_parse)) %>%
rename(name = stat) %>%
mutate(type = "line") %>%
list_parse()
hc_ast <-
tbl_top_10_ast %>%
asbviz::hc_xy(
x = "name_player",
y = "apg",
type = "column",
title = "Top 10 Passers",
image = "url_player_headshot",
label_parameters = list(
align = "left",
crop = T,
enabled = T,
format = "{point.y} apg"
),
invert_chart = F,
transparent_tooltip = T,
disable_legend = T,
subtitle = "2021-21 Season",
caption = "Column Chart",
override_x_text = list(text = ""),
override_y_label = list(format = "{value}",
gridLineWidth = 10,
gridLineDashStyle = "shortdash"),
override_y_text = list(
text = "Assists/Pts Per Game",
align = "high",
# Documentation says options are: low, middle or high
margin = 10,
# Number of pixels between the title and the axis line
style = list(
fontWeight = "bold",
# Bold
fontSize = '1.4em',
# 1.4 x tthe size of the default text
color = "#7cb5ec" # Hex code for the default blue
)
),
theme_name = "better_unica"
)
hc_ast %>%
hc_add_series_list(series_config)
gamelogs %>% pull(pts) %>%
hc_xy(
title = "Histogram of Points Scored in a Game",
subtitle = "2020-21 Season",
transparent_tooltp = T,
axis_scrollbars = c("x"),
x_min_max = c(0, 30),
disable_legend = T,
theme_name = "better_unica",
override_x_text = list(text = "Points Scored"),
override_y_text = list(text = "Count of Bin Occurences")
)
Lets explore team wins and plot as a character vector
gamelogs %>% distinct(date_game, slug_team_winner) %>%
pull(slug_team_winner) %>%
hc_xy(title = "Wins by Team",
subtitle = "2020-21 Season",
invert_chart = F)
Now lets look at some basic correlations!
gamelogs %>%
mutate_if(is.logical, as.numeric) %>%
select(
minutes,
pts,
oreb,
dreb,
fta,
ftm,
fg2a,
fg2m,
fg3a,
fg3m,
pf,
stl,
blk,
ast,
tov,
is_win,
plusminus,
is_b2b
) %>%
cor() %>%
hc_xy(
title = "2020-21 Numeric Feature Correlations",
label_parameters = list(
enabled = TRUE,
style = list(
color = "black",
textOutline = "black",
fontWeight = "normal"
),
formatter = JS(
"function(){
return Highcharts.numberFormat(this.point.value, 2);
}"
)
),
theme_name = "better_unica"
)
Might want to work on a multiple hc_add_series way but for now here his how to do it with long and tidy data.
Lets add a model fit too.
harden <-
gamelogs %>%
filter(name_player == "James Harden") %>%
select(number_game_team_season, pts, treb, ast)
harden_long <-
harden %>%
pivot_longer(-number_game_team_season)
harden_long %>%
tbl_color_group(group_column = "name", color_palette = "nbapalettes::rockets_90s") %>%
hc_xy(
x = "number_game_team_season",
facet = "name",
y = "value",
facet_column_count = 1,
title = "James Harden Key Metrics by Game",
subtitle = "2020-21 Season",
disable_legend = T,
transparent_tooltip = F,
fits = c("GLM"),
use_table_tooltip = T,
override_y_text = list(text = ""),
override_x_text = list(text = "Team Game #"),
type = "column",
facet_height = "200px",
color = "color"
)
## <colors>
## #041E42FF #BA0C2FFF #2C7AA1FF
##
## Using discrete color scheme nbapalettes::rockets_90s
##
## [2021-10-08 13:04:34 s.GLM] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 44 x 1
## Training outcome: 44 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:34 s.GLM] Training GLM...
##
## [ GLM Regression Training Summary ]
## MSE = 82.90 (3.05%)
## RMSE = 9.10 (1.54%)
## MAE = 7.28 (0.42%)
## r = 0.17 (p = 0.26)
## rho = 0.04 (p = 0.81)
## R sq = 0.03
##
## [2021-10-08 13:04:35 s.GLM] Run completed in 3e-03 minutes (Real: 0.18; User: 0.13; System: 0.03)
## [2021-10-08 13:04:35 s.GLM] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 44 x 1
## Training outcome: 44 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:35 s.GLM] Training GLM...
##
## [ GLM Regression Training Summary ]
## MSE = 9.47 (4.33%)
## RMSE = 3.08 (2.19%)
## MAE = 2.52 (4.10%)
## r = 0.21 (p = 0.18)
## rho = 0.28 (p = 0.06)
## R sq = 0.04
##
## [2021-10-08 13:04:36 s.GLM] Run completed in 2.9e-03 minutes (Real: 0.18; User: 0.13; System: 0.03)
## [2021-10-08 13:04:36 s.GLM] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 44 x 1
## Training outcome: 44 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:36 s.GLM] Training GLM...
##
## [ GLM Regression Training Summary ]
## MSE = 11.20 (2.43%)
## RMSE = 3.35 (1.22%)
## MAE = 2.78 (1.38%)
## r = 0.16 (p = 0.31)
## rho = 0.14 (p = 0.38)
## R sq = 0.02
##
## [2021-10-08 13:04:37 s.GLM] Run completed in 2.7e-03 minutes (Real: 0.16; User: 0.12; System: 0.03)
Lets explore 3PT shooting.
tbl_3pt_teams <-
gamelogs %>%
mutate(url_team_season_logo = url_team_season_logo %>% str_replace_all("2020-21", "2019-20")) %>%
group_by(name_team, url_team_season_logo) %>%
summarise(
fg3m = sum(fg3m),
fg3a = sum(fg3a),
fg3a_per_game = sum(fg3a) / n_distinct(id_game),
.groups = "drop"
) %>%
mutate(pct_3pt = (fg3m / fg3a) * 100) %>%
arrange(desc(fg3a)) %>%
mutate(color = "#2EC4B6")
cyan <- "#2EC4B6"
Now we can visualize it and also change the margin on the top and play with the background color.
tbl_3pt_teams %>%
asbviz::hc_xy(
x = "fg3a_per_game",
y = "pct_3pt",
use_fast = F,
boost = F,
type = "scatter",
override_name = "Team",
name = "name_team",
fits = c("LM", "LOESS"),
transformations = c("mean_x", "mean_y"),
title = "3PT Shooting % Versus Attempts",
subtitle = "2020-21 Season",
marker = "url_team_season_logo",
override_y_text = list(
text = "3PT %",
align = "high",
style = list(fontSize = "1.5em")
),
override_x_text = list(
text = "3PT Attempts Per Game",
align = "low",
style = list(fontSize = "1.5em")
),
transparent_tooltip = F,
color = "color",
marginTop = 60,
backgroundColor = "#FDB927"
) %>%
hc_yAxis(
# Large bolded titles
gridLineWidth = 0.5,
gridLineColor = cyan,
gridLineDashStyle = "longdash",
# Light blue long dashed gridlines
lineWidth = 0 # No Axis Line
) %>%
hc_xAxis(
gridLineWidth = 0.5,
gridLineColor = cyan,
gridLineDashStyle = "longdash",
# Light blue long dashed gridlines
tickWidth = 0,
lineWidth = 0 # No Axis Line
)
## [2021-10-08 13:04:38 s.LM] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 30 x 1
## Training outcome: 30 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:38 s.LM] Training linear model...
##
## [ LM Regression Training Summary ]
## MSE = 2.97 (9.92%)
## RMSE = 1.72 (5.09%)
## MAE = 1.34 (10.94%)
## r = 0.31 (p = 0.09)
## rho = 0.33 (p = 0.08)
## R sq = 0.10
##
## [2021-10-08 13:04:38 s.LM] Run completed in 2.9e-03 minutes (Real: 0.17; User: 0.13; System: 0.03)
## [2021-10-08 13:04:38 s.LOESS] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 30 x 1
## Training outcome: 30 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-08 13:04:38 s.LOESS] Training LOESS model...
##
## [ LOESS Regression Training Summary ]
## MSE = 2.67 (18.89%)
## RMSE = 1.63 (9.94%)
## MAE = 1.21 (19.54%)
## r = 0.44 (p = 0.02)
## rho = 0.43 (p = 0.02)
## R sq = 0.19
##
## [2021-10-08 13:04:39 s.LOESS] Run completed in 2.7e-03 minutes (Real: 0.16; User: 0.12; System: 0.03)
Lets explore Kyrie Irving and plot areas.
tbl_kyrie <-
gamelogs %>%
filter(name_player == "Kyrie Irving") %>%
mutate(color = "#FFB81CFF")
nets_colors <- c("#010101FF", "#FFB81CFF")
kyrie_photo <- tbl_kyrie$url_player_headshot %>% unique()
kyrie_gif <- "https://media3.giphy.com/media/1XnAnejYrLb8iaunke/giphy.gif"
tbl_kyrie %>%
hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
backgroundColor = nets_colors[[1]],
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points")
)
tbl_kyrie %>%
hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
plotBackgroundImage = kyrie_photo,
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points")
)
tbl_kyrie %>%
hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
plotBackgroundImage = kyrie_photo,
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points")
)
Lets look at some plot annotations.
First we can use images to cover up some areas.
tbl_kyrie %>%
mutate(color = "#FFB81CFF") %>%
hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
backgroundColor = nets_colors[[1]],
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points")
) %>%
hc_add_annotation(
shapes = list(
type = "image",
src = kyrie_photo,
width = 180, height = 131, # Correct image aspect ratio for these headshots
point = list(
x = 4, xAxis = 0,
y = 25, yAxis = 0
)
)
)
tbl_kyrie %>%
asbviz::hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
backgroundColor = nets_colors[[1]],
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points")
) %>%
hc_add_annotation(shapes = list(
type = "image",
src = kyrie_gif,
width = 180 / 2.5,
height = 131,
point = list(
x = 8.5,
xAxis = 0,
y = 25,
yAxis = 0
)
))
Lets add some plot lines
kyrie_ppg <- tbl_kyrie$pts %>% mean()
y_plot_lines <- list(list(
# Defines a single plot line, could add more
value = kyrie_ppg,
# Where on the yAxis to show the line
color = "#FFFFFF",
# Color of the line
zIndex = 1000,
# Defines priority, higher means shown on top of other elements, want to show over plot columns
label = list(
text = glue("Season ppg: {round(kyrie_ppg,1)}"),
# Test on the plotline
style = list(
color = "#FFFFFF",
# Black colored text
fontSize = "16px",
# Text 16 pixels large
fontWeight = "bold" # Bold
)
)
)
)
x_plot_lines <-
list(list(
from = 9,
# Start of the plotband (first game # of injury)
to = 15,
# End of the plotband (last game missed)
color = "#655959",
# RGB specification of the lakers purple color with a 30% alpha (transparency)
label = list(
text = "Mental Issues",
# Text for the plotBand
style = list(
fontWeight = "bold",
color = "#FFFFFF",
fontSize = "12px"
)
)
))
tbl_kyrie %>%
hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
backgroundColor = nets_colors[[1]],
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points"),
data_y_lines = y_plot_lines,
data_x_lines = x_plot_lines
)
Now lets add some labels
tbl_kyrie %>%
hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
backgroundColor = nets_colors[[1]],
label_parameters = list(
align = "left",
crop = T,
enabled = T,
format = "{point.y} pts"
),
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points"),
data_y_lines = y_plot_lines,
data_x_lines = x_plot_lines
)
tbl_kyrie %>%
hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
backgroundColor = nets_colors[[1]],
label_parameters =
list(
enabled = TRUE,
formatter = JS(
"
function(){
if (this.y == this.series.dataMax) {
return('High Score: ' + this.y)
} else if (this.y == this.series.dataMin) {
return('Low Score: ' + this.y)
}
}
"
)
),
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points"),
data_y_lines = y_plot_lines,
data_x_lines = x_plot_lines
)
Now we can try a version using a custom glue
label and binding it to the name parameters.
We will also create a glue version of custom tooltip.
tbl_kyrie <-
tbl_kyrie %>%
mutate(
chart_name_label = case_when(
pts == max(pts) ~ glue(
"High Score {pts} <i> ({format(date_game, '%d %b %y')} vs {slug_opponent}) </i>"
),
pts == min(pts) ~ glue(
"Low Score {pts} <i> ({format(date_game, '%d %b %y')} vs {slug_opponent}) </i>"
),
TRUE ~ "" # No label
),
custom_tooltip_glue = glue(
"
Date: {date_game} <br>
Points: {pts} <br>
FG%: {scales::percent(pct_fg)} <br>
FG3%: {scales::percent(pct_fg3)} <br>
FT%: {scales::percent(pct_ft)} <br>
Rebounds: {treb} <br>
Assists: {ast} <br>
Turnovers: {tov} <br>
"
)
)
tbl_kyrie <- features_to_tibble(
data = tbl_kyrie,
keep_columns = c(
"date_game",
"pts",
"ast",
"pct_fg",
"pct_ft",
"pct_ft",
"treb",
"pf",
"tov",
"blk"
)
) %>%
table_tooltip(
data = tbl_kyrie,
df_tip = .,
thumbnail_parameters = list(
thumbnail_url = "url_player_headshot",
alt = "name_player",
height = 80,
width = 80,
is_sizing_px = T,
html_parent = "td"
)
)
tbl_kyrie %>%
hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
name = "chart_name_label",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
backgroundColor = nets_colors[[1]],
label_parameters = list(
align = "center",
crop = F,
enabled = T,
format = "{point.name}"
),
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points"),
data_y_lines = y_plot_lines,
data_x_lines = x_plot_lines
)
Now lets look at the custom tooltip using glue.
tbl_kyrie %>%
asbviz::hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
name = "chart_name_label",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
backgroundColor = nets_colors[[1]],
tooltip = "custom_tooltip_glue",
label_parameters = list(
align = "center",
crop = F,
enabled = T,
format = "{point.name}"
),
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points"),
data_y_lines = y_plot_lines,
data_x_lines = x_plot_lines
)
Finally using the asbviz
tooltip functionality
tbl_kyrie %>%
hc_xy(
x = "number_game_team_season",
y = "pts",
type = "column",
name = "chart_name_label",
color = "color",
title = "Kyrie Irving",
theme_name = "better_unica",
subtitle = "Scoring for the 2020-21 Season",
backgroundColor = nets_colors[[1]],
tooltip = "html_tooltip",
label_parameters = list(
align = "center",
crop = F,
enabled = T,
format = "{point.name}"
),
disable_legend = T,
override_x_text = list(text = "Game #"),
override_y_text = list(text = "Points"),
data_y_lines = y_plot_lines,
data_x_lines = x_plot_lines
)
Lets look at the number of games per day.
tbl_days_games <-
gamelogs %>%
group_by(date_game) %>%
summarise(count_games = n_distinct(id_game),
.groups = "drop")
Now I want to visualize
tbl_days_games %>%
asbviz::hc_xy(
x = "date_game",
y = "count_games",
type = "area",
opacity = .5,
title = "Games by Day",
disable_legend = T,
subtitle = "2020-2021 Season",
override_y_text = list(text = "# of Games"),
dateTimeLabelFormats = list(week = "%b-%y" # Month name and short year)
)
)
These are all the different date formatting options.
highcharter has correctly plotted this as a date axis (the tooltip will show a day of the week for example). Highcharts finds the appropriate format for the span of the dates. Those patterns are:
second: '%H:%M:%S',
minute: '%H:%M',
hour: '%H:%M',
day: '%e. %b',
week: '%e. %b',
month: '%b \'%y',
year: '%Y'
We can also add the navigator.
tbl_days_games %>%
hc_xy(
x = "date_game",
y = "count_games",
type = "area",
opacity = .5,
title = "Games by Day",
disable_legend = T,
dateTimeLabelFormats = list(week = "%b-%y"),
override_y_text = list(text = "# of Games"),
use_navigator = T
)
We can also add a range selector.
tbl_days_games %>%
hc_xy(
x = "date_game",
y = "count_games",
type = "area",
opacity = .5,
title = "Games by Day",
disable_legend = T,
dateTimeLabelFormats = list(week = "%b-%y"),
use_navigator = T,
use_range_selector = T,
theme_name = "better_unica"
)
We can also add an X plotline.
x_plot_line <- list(
list(
value = lubridate::ymd(20210516) %>% datetime_to_timestamp(),
color = "#FF0000",
label = list(
text = "Final game of the regular season",
align = "top",
style = list(color = "#FF0000")
)
)
)
tbl_days_games %>%
hc_xy(
x = "date_game",
y = "count_games",
type = "area",
opacity = .5,
title = "Games by Day",
disable_legend = T,
dateTimeLabelFormats = list(week = "%b-%y"),
use_navigator = T,
use_range_selector = T,
theme_name = "better_unica",
data_x_lines = x_plot_line
)
Lets look at an example showing cumulative score of a game over time.
library(lubridate)
game <-
gamelogs %>%
group_by(id_game, date_game) %>%
summarise(pts = sum(pts)) %>%
ungroup() %>%
arrange(desc(pts)) %>%
slice(1)
tbl_play_by_play <-
play_by_play_v2(game$id_game) %>%
janitor::clean_names()
## Getting play by play for game 22000310
date_of_game <- game$date_game %>% unique()
teams <- tbl_play_by_play %>% filter(!is.na(slug_team_player1)) %>% distinct(slug_team_player1) %>% pull() %>% str_c(collapse = " vs. ")
tbl_play_by_play <- tbl_play_by_play %>%
mutate(
date_game = game$date_game %>% unique(),
time = str_c(date_game, " ", time_string_wc) %>% ymd_hm(tz = "America/New_York")
)
tbl_score <-
tbl_play_by_play %>%
filter_at(vars(matches("^score")), all_vars(!is.na(.))) %>%
distinct(score_away, score_home, time) %>%
pivot_longer(-time)
tbl_score %>%
hc_xy(
x = "time",
y = "value",
group = "name",
color_palette = "pals::kovesi.diverging_bky_60_10_c30",
override_y_text = list(text = "Score"),
override_y_label = list(format = "{value}"),
override_x_text = list(text = "Hour and Minute of Game"),
title = "{teams} on {date_of_game} Play by Play Score",
type = "line",
use_navigator = T
)
##
## Using continuous color scheme pals::kovesi.diverging_bky_60_10_c30
##
## <colors>
## #0E94FAFF #B38B1AFF
tbl_wins <-
gamelogs %>%
distinct(slug_team, id_game, is_win) %>%
group_by(slug_team) %>%
summarise(count_wins = sum(is_win)) %>%
arrange(desc(count_wins))
tbl_wins %>%
tbl_color_group(
group_column = "slug_team",
color_palette = "Polychrome::glasbey",
direction = 1
) %>%
left_join(gamelogs %>% distinct(slug_team, url_team_season_logo), by = "slug_team") %>%
hc_xy(
x = "slug_team",
y = "count_wins",
type = "column",
color = "color",
image = "url_team_season_logo",
theme_name = "better_unica",
override_x_text = list(text = ""),
override_y_label = list(format = "{value}"),
override_y_text = list(text = "wins", align = "high"),
label_parameters = list(
align = "center",
crop = F,
enabled = T,
format = "{point.y} wins"
),
disable_legend = T,
axis_scrollbars = c("x"),
x_min_max = c(0, 14),
title = "2020-21 Wins by Team",
transparent_tooltip = T
)
## <colors>
## #FFFFFFFF #0000FFFF #FF0000FF #00FF00FF #000033FF #FF00B6FF #005300FF #FFD300FF #009FFFFF #9A4D42FF #00FFBEFF #783FC1FF #1F9698FF #FFACFDFF #B1CC71FF #F1085CFF #FE8F42FF #DD00FFFF #201A01FF #720055FF #766C95FF #02AD24FF #C8FF00FF #886C00FF #FFB79FFF #858567FF #A10300FF #14F9FFFF #00479EFF #DC5E93FF
##
## Using discrete color scheme Polychrome::glasbey
Lets look at how Pelicans players did scoring-wise home verus away.
tbl_pels <- gamelogs %>%
filter(name_team == "New Orleans Pelicans") %>%
group_by(name_player, location_game) %>%
summarise(
avg_pts = mean(pts)
) %>%
ungroup() %>%
group_by(name_player) %>%
filter(n()==2, avg_pts > 0) %>%
ungroup()
tbl_pels %>%
tbl_ordered_factor(columns = "name_player", weight = "avg_pts", reverse = F) %>%
hc_xy(
x = "name_player",
y = "avg_pts",
group = "location_game",
invert_chart = T,
color_palette = "ggthemes::Classic Area Red-Green",
transformations = c("mean_y", "log_y"),
override_x_text = list(text = ""),
override_y_text = list(text = "Avg. Points (log transformed)"),
title = "2020-21 Pelicans Pts Home vs Away"
)
##
## Using continuous color scheme ggthemes::Classic Area Red-Green
##
## <colors>
## #BD1100FF #4A8C1CFF
Lets explore this in a bubble chart.
This will take the top 5 teams and explore the players with over 50 3PT makes and then embed a chart in chart view of makes per game.
fg3m_top5 <-
gamelogs %>%
group_by(name_team) %>%
summarise(total_fg3m = sum(fg3m)) %>%
arrange(desc(total_fg3m)) %>%
slice(1:5)
# FG3M / game from those teams with FG3M 50
fg3m_season_total =
gamelogs %>%
inner_join(fg3m_top5, by = "name_team") %>%
group_by(name_player, name_team) %>%
summarise(fg3m = sum(fg3m), games = n()) %>%
filter(fg3m >= 50) %>%
mutate(fg3m_pg = fg3m / games) %>%
ungroup()
fg3m_by_game <-
gamelogs %>%
create_tooltip_data(
x = "number_game_team_season",
y = "fg3m",
group = "name_player",
tt_key = "ttdata",
list_parse_2 = F
)
df <-
fg3m_season_total %>%
left_join(fg3m_by_game, by = c(name_player = "group"))
hc_xy(
data = df,
name = "name_player",
group = "name_team",
y = "fg3m_pg",
type = "packedbubble",
zoom_type = c("x"),
data_tooltip = "ttdata",
color_pallete = "grDevices::Plasma",
enable_point_select = T,
title = "Top 5 3PT Shooting Teams, Players >= 50 FG3M",
subtitle = "2020-21 Season",
data_tooltip_params = list(
chart = list(type = "area"),
title = list(text = "point.name"),
subtitle = list(text = "3pts Attempts"),
plotOptions = list(series = list(animation = 2000, name = "point.name"))
),
theme_name = "better_unica",
opacity = 1,
label_parameters = list(
enabled = TRUE,
format = "{point.name}"
),
override_series = list(
minSize = '5px',
maxSize = '90px',
layoutAlgorithm = list(
gravitationalConstant = 0.05,
bubblePadding = 10,
splitSeries = TRUE,
seriesInteraction = FALSE,
dragBetweenSeries = TRUE,
maxIterations = 500
)
)
)
##
## Using continuous color scheme pals::kovesi.linear_gow_65_90_c35
##
## <colors>
## #70AD5CFF #B7B164FF #E6B86DFF #E6CE9CFF #E2E2E2FF
Lets look at a Donut Chart of the true shooting percentage
Lets assemble the data
nbastatR::assign_nba_teams()
df_dict_nba_teams <-
df_dict_nba_teams %>%
janitor::clean_names()
# Create a data frame with Team Name and Color for the outer ring
tbl_donut <-
df_dict_nba_teams %>%
filter(is_non_nba_team == 0) %>%
select(name_team, colors_team, url_thumbnail_team) %>%
mutate(primary_color =
colors_team %>%
str_extract("[a-z0-9\\#]+(?=\\,)"))
# Create a True Shooting Summary For Each Player with more than 500 minutes
tbl_true_shooting <-
gamelogs %>%
group_by(name_player, name_team) %>%
summarise(
total_minutes = sum(minutes),
total_true_shooting_attempts = sum(fga + 0.44 * fta),
ts_pct = sum(pts) / sum((2 * (fga + 0.44 * fta)))
) %>%
filter(total_minutes >= 500) %>%
group_by(name_team) %>%
arrange(desc(total_true_shooting_attempts)) %>%
ungroup()
# Calculate League Average True Shooting %
pct_league_mean_ts <-
tbl_true_shooting %>% summarise(mean(ts_pct)) %>% pull()
tbl_teams <- tbl_donut %>%
select(name_team, primary_color) %>%
distinct() %>%
mutate(segment = 1)
Lets visualize it.
tbl_teams %>%
left_join(
tbl_true_shooting %>%
asbviz::create_tooltip_data(
name = "name_player",
group = "name_team",
y = "ts_pct",
z = "total_true_shooting_attempts"
),
by = c("name_team" = "group")
) %>%
asbviz::hc_xy(
name = "name_team",
y = "segment",
color = "primary_color",
type = "pie",
innerSize = 500,
title = "2020-21 Team True Shooting",
caption = "All players exceending 500 minutes played",
data_tooltip = "ttdata",
theme_name = "better_unica",
data_tooltip_params = list(
chart = list(type = "bubble"),
yAxis = list(
min = 0.4,
max = 0.75,
title = list(text = "True Shooting %"),
plotLines = list(
list(
value = pct_league_mean_ts,
zIndex = -100,
color = "rgba(212, 217, 214, 0.7)",
label = list(
text = "League Average",
align = "right",
style = list(fontWeight = "bold", color = "rgba(212, 217, 214, 0.7)")
)
)
)
),
xAxis = list(visible = FALSE),
plotOptions = list(bubble = list(
dataLabels = list(
enabled = TRUE,
allowOverlap = TRUE,
format = "{point.name}",
y = -15,
# On top of bubbles
style = list(
textOutline = "none",
color = "#011627",
fontSize = "10px",
fontWeight = "normal"
)
)
))
)
)