hc_kunst.Rmd
library(asbviz)
library(tidyverse)
library(glue)
library(highcharter)
library(purrr)
library(forecast)
library(janitor)
library(nbastatR)
library(dplyr)
library(quantmod)
library(modelR2)
library(gapminder)
library(grDevices)
library(forecast)
library(rvest)
library(idbr)
library(purrr)
library(datos) # datos
library(ggplot2) # más gráficos
library(tidyr)
library(forcats) # para usar la función fct_inorder
library(scales)
library(ggridges)
library(lubridate)
library(stringr)
library(tidytext)
library(treemap)
library(economiccomplexity)
library(igraph)
library(Matrix)
library(ggraph)
library(countrycode)
library(tsbox)
library(survival)
Highcharts is a SVG-based, multi-platform charting library. My wrapper makes it easier to create out of the box visualizations
Basic count with proximate legend
tbl_count_class <-
ggplot2::mpg %>%
count(class)
tbl_count_class %>%
hc_xy(
x = "class",
y = "n",
type = "column",
invert_chart = T,
override_legend_location = "proximate"
)
Add colors
tbl_count_class <- tbl_count_class %>%
mutate(class = fct_inorder(class)) %>%
mutate(color = '#ffff00')
Visaulize the chart
hc_xy(
data = tbl_count_class,
x = "class",
y = "n",
type = "column",
title = "Count of car types in the data miles",
disable_legend = T,
invert_chart = T,
theme_name = "better_unica"
)
Gapminder
tbl_continents <-
gapminder::gapminder
continents <- tbl_continents %>%
group_by(year, continent) %>%
summarise(
weighted_per_capita = weighted.mean(gdpPercap, pop),
weighted_life_exp = weighted.mean(lifeExp, pop)
) %>%
ungroup()
Plot the the weighted average lifespans with lags.
continents %>%
ungroup() %>%
hc_xy(
type = "line",
group = "continent",
x = "year",
color_type = "continuous",
y = "weighted_life_exp",
theme_name = "better_unica",
lag_periods = 1,
)
##
## Using continuous color scheme scico::bilbao
##
## <colors>
## #FFFFFFFF #C0BBA5FF #AB8B67FF #974D4BFF #4C0000FF
##
## Using continuous color scheme grDevices::RdYlGn
##
## <colors>
## #A51122FF #EB9C00FF #FEFDBEFF #85BC47FF #006228FF
tbl_tweets <-
read_csv("https://gist.githubusercontent.com/abresler/09b36b29afd9fd50c93ee920c215e504/raw/33fc7227de38b3562b1408cdc404c2304e899161/dtweets.csv")
dtweets <- tbl_tweets %>%
filter(between(year(created_at), 2020, 2020)) %>%
mutate(created_at = as.Date(created_at)) %>%
select(created_at, text, favorite_count, retweet_count) %>%
arrange(created_at)
dtweets <- dtweets %>%
distinct(created_at, .keep_all = TRUE)
dtops <- dtweets %>%
top_n(5, favorite_count)
dtext <- dtweets %>%
select(created_at, text) %>%
mutate(text = str_extract(text, "[0-9]+.*"))
data <- dtweets %>%
select(created_at, Favorites = favorite_count, Retweets = retweet_count) %>%
gather(type, count, -created_at) %>%
arrange(created_at) %>%
left_join(dtext, by = "created_at")
top_annotations <- dtops %>%
mutate(created_at = datetime_to_timestamp(created_at)) %>%
select(x = created_at, y = favorite_count, text) %>%
df_to_annotations_labels()
data %>%
hc_xy(
type = "line",
x = "created_at" ,
y = "count",
group = "type",
override_x_text = list(text = ""),
color_palette = "grDevices::PiYG",
use_table_tooltip = T,
override_y_text = list("Count"),
override_y_label = list(
formatter = JS(
"function(){
return '' + Highcharts.numberFormat(this.value, 0, '.', ',') + ' tweets';
}"
)
),
override_x_label = list(
staggerLines = 1,
formatter = JS(
"function () { return Highcharts.dateFormat('%B', this.value); }"
),
tickPositioner = JS(
"function () {
var positions = [],
tick = Math.floor(this.dataMin),
increment = 1000 * 3600 * 24 * 91.5; // 3 months
for (tick; tick <= this.dataMax; tick += increment) {
positions.push(tick);
}
if (positions.indexOf(this.dataMax) == -1) positions.push(this.dataMax);
return positions;
}"
)
),
zoom_type = NULL,
annotations = list(
labelOptions = list(
shape = "connector",
align = "right",
justify = FALSE,
crop = TRUE,
style = list(fontSize = "0.8em", textOutline = "1px white")
),
labels = top_annotations
)
)
##
## Using continuous color scheme grDevices::PiYG
##
## <colors>
## #90005DFF #315D00FF
tbl_2007 <-
gapminder %>% filter(year == 2007)
tbl_2007 %>%
hc_xy(
type = "scatter",
name = "country",
group = "continent",
x = "gdpPercap",
color_type = "discrete",
color_palette = "lisa::Jean_MichelBasquiat_1",
y = "lifeExp",
theme_name = "smpl",
title = "2007 Life Expectancy by Per Capita Income",
override_model_groups = T,
override_legend_location = "proximate",
fits = c("RF", "LOESS")
)
## <colors>
## #C11432FF #009ADAFF #66A64FFF #FDD10AFF #070707FF
##
## Using discrete color scheme lisa::Jean_MichelBasquiat_1
##
## [2021-10-05 16:25:16 s.RF] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 142 x 1
## Training outcome: 142 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [ Parameters ]
## n.trees: 1000
## mtry: NULL
##
## [2021-10-05 16:25:16 s.RF] Training Random Forest Regression with 1000 trees...
## | Out-of-bag |
## Tree | MSE %Var(y) |
## 100 | 63.67 43.99 |
## 200 | 64.46 44.54 |
## 300 | 62.58 43.24 |
## 400 | 62.38 43.10 |
## 500 | 61.98 42.82 |
## 600 | 61.95 42.80 |
## 700 | 62.02 42.85 |
## 800 | 62.14 42.93 |
## 900 | 62.21 42.98 |
## 1000 | 62.35 43.08 |
##
## [ RF Regression Training Summary ]
## MSE = 15.53 (89.27%)
## RMSE = 3.94 (67.24%)
## MAE = 2.69 (74.23%)
## r = 0.95 (p = 8.5e-71)
## rho = 0.95 (p = 0.00)
## R sq = 0.89
##
## [2021-10-05 16:25:17 s.RF] Run completed in 0.01 minutes (Real: 0.49; User: 0.46; System: 0.03)
## [2021-10-05 16:25:17 s.LOESS] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 142 x 1
## Training outcome: 142 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-05 16:25:17 s.LOESS] Training LOESS model...
##
## [ LOESS Regression Training Summary ]
## MSE = 48.67 (66.37%)
## RMSE = 6.98 (42.01%)
## MAE = 4.81 (54.03%)
## r = 0.81 (p = 6.1e-35)
## rho = 0.86 (p = 0.00)
## R sq = 0.66
##
## [2021-10-05 16:25:17 s.LOESS] Run completed in 1.1e-03 minutes (Real: 0.07; User: 0.05; System: 0.01)
Lets transform the x axis and add some descriptive text.
text <- "The graph shows us the relationship between
<b> GDP per capita </b> and <b> Life expectancy </ b> for countries in 2007.
It is observed that the large number of countries with low life expectancy belong
to the African continent, of which the majority has low GDP per capita. <br>
For <b> GDP per capita </b> a scale <i> logarithmic </i> due
to the asymmetry of this variable."
tbl_2007 %>%
hc_xy(
type = "scatter",
name = "country",
group = "continent",
x = "gdpPercap",
color_type = "discrete",
color_palette = "lisa::Jean_MichelBasquiat_1",
y = "lifeExp",
size = "pop",
theme_name = "smpl",
transformations = "log_x",
caption = text,
title = "2007 Life Expectancy by Per Capita Income",
override_model_groups = T,
override_legend_location = "proximate",
fits = c("RF", "LOESS")
)
## <colors>
## #C11432FF #009ADAFF #66A64FFF #FDD10AFF #070707FF
##
## Using discrete color scheme lisa::Jean_MichelBasquiat_1
##
## [2021-10-05 16:25:17 s.RF] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 142 x 1
## Training outcome: 142 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [ Parameters ]
## n.trees: 1000
## mtry: NULL
##
## [2021-10-05 16:25:17 s.RF] Training Random Forest Regression with 1000 trees...
## | Out-of-bag |
## Tree | MSE %Var(y) |
## 100 | 64.76 44.74 |
## 200 | 63.75 44.05 |
## 300 | 63.97 44.20 |
## 400 | 63.5 43.88 |
## 500 | 63.6 43.94 |
## 600 | 63.18 43.65 |
## 700 | 63.51 43.88 |
## 800 | 63.2 43.67 |
## 900 | 63.2 43.67 |
## 1000 | 62.77 43.37 |
##
## [ RF Regression Training Summary ]
## MSE = 15.69 (89.16%)
## RMSE = 3.96 (67.07%)
## MAE = 2.70 (74.18%)
## r = 0.95 (p = 1.9e-70)
## rho = 0.95 (p = 0.00)
## R sq = 0.89
##
## [2021-10-05 16:25:18 s.RF] Run completed in 3e-03 minutes (Real: 0.18; User: 0.16; System: 0.02)
## [2021-10-05 16:25:18 s.LOESS] Hello, alexbresler
##
## [ Regression Input Summary ]
## Training features: 142 x 1
## Training outcome: 142 x 1
## Testing features: Not available
## Testing outcome: Not available
##
## [2021-10-05 16:25:18 s.LOESS] Training LOESS model...
##
## [ LOESS Regression Training Summary ]
## MSE = 48.67 (66.37%)
## RMSE = 6.98 (42.01%)
## MAE = 4.81 (54.03%)
## r = 0.81 (p = 6.1e-35)
## rho = 0.86 (p = 0.00)
## R sq = 0.66
##
## [2021-10-05 16:25:18 s.LOESS] Run completed in 1e-03 minutes (Real: 0.06; User: 0.05; System: 0.01)
Plot it
tbl_diamonds_cut %>%
hc_xy(
name = "cut",
y = "n",
size = "percent",
type = "pie",
label_parameters = list(format = "{point.name}<br>({point.size})", enabled = T),
innerSize = "80%"
)
Add a version with chart in chart!!
tbl_diamonds_cut %>%
hc_xy(
name = "cut",
y = "n",
size = "percent",
type = "pie",
label_parameters = list(format = "{point.name}<br>({point.size})", enabled = T),
innerSize = "80%"
) %>%
hc_tooltip(
useHTML = TRUE,
style = list(fontSize = "30px"),
headerFormat = "",
pointFormat = "<div style='text-align: center;'>Cut <b>{point.name}</b><br>{point.y} Cases<br>{point.percent} of total</div>",
positioner = JS(
"function () {
/* one of the most important parts! */
xp = this.chart.chartWidth/2 - this.label.width/2
yp = this.chart.chartHeight/2 - this.label.height/2
return { x: xp, y: yp };
}"
),
shadow = FALSE,
borderWidth = 0,
backgroundColor = "transparent",
hideDelay = 1000
)
This one is in Spainish
clima <-
datos::clima %>%
mutate(
fecha = lubridate::ymd(paste(anio, mes, dia, paste = "-")),
mes_fecha = fct_inorder(months(fecha))
)
hc_xy(
data = clima,
x = "temperatura",
group = "mes_fecha",
type = "ridgeline",
theme_name = "better_unica",
zIndex = 12:1,
lineWidth = 3,
title = "Ridgelines",
color_palette = "pals::jet",
opacity = 1,
disable_x = F,
disable_y = T,
invert_chart = T
)
##
## Using continuous color scheme pals::jet
##
## <colors>
## #00007FFF #0000DCFF #0039FFFF #0096FFFF #00F3FFFF #50FFADFF #ADFF50FF #FFF300FF #FF9600FF #FF3900FF #DC0000FF #7F0000FF
movies <- ggplot2movies::movies
films <-
movies %>%
select(year, Action:Short) %>%
gather(category, count, -year) %>%
group_by(year, category) %>%
summarise(count = sum(count))
Basic area
hc_xy(
data = films,
type = "area",
x = "year",
y = "count",
group = "category",
color_palette = "viridis::inferno",
color_type = "continuous",
opacity = .5,
override_y_label = list(
formatter = JS(
"function(){
return '' + Highcharts.numberFormat(this.value, 0, '.', ',') + ' films';
}"
)
),
stacking = "normal",
override_series = list(
stacking = list(enbled = TRUE),
marker = list(symbol = "circle")
),
override_tooltip = F
) %>%
hc_tooltip(table = TRUE, sort = TRUE)
##
## Using continuous color scheme viridis::inferno
##
## <colors>
## #000004FF #330A5FFF #781C6DFF #BB3754FF #ED6925FF #FCB519FF #FCFFA4FF
Lets add some plot lines and descriptive text
events <- tibble(
year = c(1930, 1941, 1990),
text = c(
"Beginning was golden in Hollywood.",
"Start of TV",
"Beginning of the rise of independent cinema.")
)
data_plot_line_x <- events %>%
transmute(value = year,
label = map(text, ~ list(text = .x))) %>%
mutate(color = "#666", width = 2, zIndex = 5)
movie_caption <- "Data from <b> IMDB </b> through the better_unicamovies package. <br>Most events were obtained from flash reading in <b> Wikipedia </b>"
Viz it!
hc_xy(
data = films,
type = "area",
x = "year",
y = "count",
group = "category",
color_palette = "viridis::inferno",
color_type = "continuous",
opacity = 1,
theme_name = "clean_unica",
caption = movie_caption,
title = "Popularity of Movies by Type Over Time",
override_y_label = list(
formatter = JS(
"function(){
return '' + Highcharts.numberFormat(this.value, 0, '.', ',') + ' films';
}"
)
),
stacking = "normal",
data_x_lines = data_plot_line_x,
override_series = list(
stacking = list(enbled = TRUE),
marker = list(symbol = "circle")
),
override_tooltip = F
) %>%
hc_tooltip(table = TRUE, sort = TRUE)
##
## Using continuous color scheme viridis::inferno
##
## <colors>
## #000004FF #330A5FFF #781C6DFF #BB3754FF #ED6925FF #FCB519FF #FCFFA4FF
MGP count with overriden category.
tbl_mpg_count <- ggplot2::mpg %>%
count(class, cyl) %>%
arrange(-n) %>%
mutate(
class = fct_inorder(class),
class_n = as.numeric(class) - 1,
cyl = factor(cyl, c(4,5,6,8))
)
Visualize it
tbl_mpg_count %>% hc_xy(
x = "class_n",
y = "n",
group = "cyl",
type = "column",
stacking = "normal",
color_type = "continuous",
color_palette = "pals::jet",
override_x_categories = levels(tbl_mpg_count %>% pull(class))
)
##
## Using continuous color scheme pals::jet
##
## <colors>
## #00007FFF #00D4FFFF #FFD400FF #7F0000FF
tourfrance <-
read_csv(
"https://raw.githubusercontent.com/jbkunst/blog/master/data/tour_france_state_8.txt"
)
Basic area chart.
hc_tour_area <-
tourfrance %>% mutate(color = "navy") %>%
hc_xy(
type = "area",
opacity = .5,
x = "distance",
y = "elevation",
subtitle = "Example obtained from HighchartsJS documentation",
override_x_label = list(format = "{value} km"),
override_x_text = list(text = "Distance"),
override_y_label = list(format = "{value} m"),
override_y_text = list(text = "Elevation"),
title = "Tour de Francia 2017, Stage 8: <i>Dole - Station des Rousses</i>",
disable_legend = T
) %>%
hc_tooltip(headerFormat = "Distance: {point.x:.1f} km<br>",
pointFormat = "{point.y} m a. s. l.")
Lets see it
hc_tour_area
Lets add some annotations.
df1 <- read_csv(
'"x","y", "text"
27.98,255, "Arbois"
45.5,611,"Montrond"
63,651,"Mont-sur-Monnet"
84,789,"Bonlieu"
129.5,382,"Chassal"
159,443,"Saint-Claude"'
)
df2 <- read_csv(
'"x","y","text"
101.44,1026,"Col de la Joux"
138.5,748,"Côte de Viry"
176.4,1202,"Montée de la Combe<br>de Laisia Les Molunes"'
)
df3 <- read_csv(
'"x","y","text"
96.2,783,"6.1 km climb<br>4.6% on avg."
134.5,540,"7.6 km climb<br>5.2% on avg."
172.2,925,"11.7 km climb<br>6.4% on avg."
'
)
df1_p <- df_to_annotations_labels(df1)
df2_p <- df_to_annotations_labels(df2)
df3_p <- df_to_annotations_labels(df3)
Now we plot it.
hc_tour_area %>%
hc_annotations(
list(
labelOptions = list(
backgroundColor = 'rgba(255,255,255,0.5)',
verticalAlign = "top",
y = 15
),
labels = df1_p
),
list(labels = df2_p),
list(
labelOptions = list(
shape = "connector",
align = "right",
justify = FALSE,
crop = TRUE,
style = list(fontSize = "0.8em", textOutline = "1px white")
),
labels = df3_p
)
)
Gapminder
tbl_2007 %>% hc_xy(
group = c("continent", "country"),
size = "pop",
type = "treemap",
is_sunburst = T,
title = "2007 Gapminder",
tree_labels = list(
list(
level = 1,
borderWidth = 0,
borderColor = "transparent",
colorByPoint = TRUE,
dataLabels = list(enabled = TRUE)
),
list(
level = 2,
borderWidth = 0,
borderColor = "transparent",
colorVariation = list(key = "brightness", to = 0.50),
dataLabels = list(enabled = TRUE)
)
)
)
Gapminder
tbl_2007 %>%
mutate(world = "world") %>%
hc_xy(
group = c("world", "continent", "country"),
size = "pop",
use_new_treemap = T,
type = "treemap",
is_sunburst = T,
tree_labels = list(
list(
level = 1,
borderWidth = 0,
color = "transparent",
borderColor = "transparent",
# colorByPoint = TRUE,
dataLabels = list(enabled = TRUE)
),
list(
level = 2,
borderWidth = 0,
borderColor = "transparent",
colorByPoint = TRUE,
dataLabels = list(enabled = TRUE)
),
list(
level = 3,
borderWidth = 0,
borderColor = "transparent",
colorVariation = list(key = "brightness", to = 0.50),
dataLabels = list(enabled = TRUE)
)
)
)
##
## Using continuous color scheme scico::vikO
##
## <colors>
## #4E193DFF