library(asbviz)
library(tidyverse)
library(glue)
library(highcharter)
library(purrr)
library(forecast)
library(janitor)
library(nbastatR)
library(dplyr)
library(quantmod)
library(modelR2)
library(gapminder)
library(grDevices)
library(forecast)
library(rvest)
library(idbr)
library(purrr)
library(datos)       # datos
library(ggplot2)     # más gráficos
library(tidyr)
library(forcats) # para usar la función fct_inorder
library(scales)
library(ggridges)
library(lubridate)
library(stringr)
library(tidytext)
library(treemap)
library(economiccomplexity)
library(igraph)
library(Matrix)
library(ggraph)
library(countrycode)
library(tsbox)
library(survival)

Highcharts is a SVG-based, multi-platform charting library. My wrapper makes it easier to create out of the box visualizations

Josh Kunst Tutorial

Column

Basic count with proximate legend


tbl_count_class <- 
  ggplot2::mpg %>% 
  count(class)
 
tbl_count_class %>% 
  hc_xy(
    x = "class",
    y = "n",
    type = "column",
    invert_chart = T,
    override_legend_location = "proximate"
  )

Add colors

tbl_count_class <- tbl_count_class %>% 
  mutate(class = fct_inorder(class)) %>% 
  mutate(color = '#ffff00')

Visaulize the chart

hc_xy(
    data = tbl_count_class,
    x = "class",
    y = "n",
    type = "column",
    title =  "Count of car types in the data miles",
disable_legend = T,
invert_chart = T,
theme_name = "better_unica"
  )

Line

Gapminder


tbl_continents <- 
  gapminder::gapminder

continents <- tbl_continents %>%
  group_by(year, continent) %>%
  summarise(
    weighted_per_capita = weighted.mean(gdpPercap, pop),
    weighted_life_exp = weighted.mean(lifeExp, pop)
  ) %>% 
  ungroup()

Plot the the weighted average lifespans with lags.

continents %>%
  ungroup() %>%
  hc_xy(
    type = "line",
    group = "continent",
    x = "year",
    color_type = "continuous",
    y = "weighted_life_exp",
    theme_name = "better_unica",
    lag_periods = 1,
  )
## 
## Using continuous color scheme scico::bilbao
## 
## <colors>
## #FFFFFFFF #C0BBA5FF #AB8B67FF #974D4BFF #4C0000FF 
## 
## Using continuous color scheme grDevices::RdYlGn
## 
## <colors>
## #A51122FF #EB9C00FF #FEFDBEFF #85BC47FF #006228FF

Annotated Line Progress Bar

tbl_tweets <- 
  read_csv("https://gist.githubusercontent.com/abresler/09b36b29afd9fd50c93ee920c215e504/raw/33fc7227de38b3562b1408cdc404c2304e899161/dtweets.csv")

dtweets <- tbl_tweets %>%
  filter(between(year(created_at), 2020, 2020)) %>%
  mutate(created_at = as.Date(created_at)) %>%
  select(created_at, text, favorite_count, retweet_count) %>%
  arrange(created_at)

dtweets <- dtweets %>%
  distinct(created_at, .keep_all = TRUE)

dtops <- dtweets %>%
  top_n(5, favorite_count)

dtext <- dtweets %>%
  select(created_at, text) %>%
  mutate(text = str_extract(text, "[0-9]+.*"))

data <- dtweets %>%
  select(created_at, Favorites = favorite_count, Retweets = retweet_count) %>%
  gather(type, count, -created_at) %>%
  arrange(created_at) %>%
  left_join(dtext, by = "created_at")

top_annotations <- dtops %>%
  mutate(created_at = datetime_to_timestamp(created_at)) %>%
  select(x = created_at, y = favorite_count, text) %>%
  df_to_annotations_labels()

data %>%
  hc_xy(
    type = "line",
    x = "created_at" ,
    y = "count",
    group = "type",
    override_x_text = list(text = ""),
    color_palette = "grDevices::PiYG",
    use_table_tooltip = T,
    override_y_text = list("Count"),
    override_y_label = list(
      formatter = JS(
        "function(){
                       return '' + Highcharts.numberFormat(this.value, 0, '.', ',') + ' tweets';
                       }"
      )
    ),
    override_x_label = list(
      staggerLines = 1,
      formatter = JS(
        "function () { return Highcharts.dateFormat('%B', this.value); }"
      ),
      tickPositioner = JS(
        "function () {
                var positions = [],
                    tick = Math.floor(this.dataMin),
                    increment = 1000 * 3600 * 24 * 91.5; // 3 months

                for (tick; tick <= this.dataMax; tick += increment) {
                    positions.push(tick);
                }
                if (positions.indexOf(this.dataMax) == -1) positions.push(this.dataMax);
                return positions;
            }"
      )
    ),
    zoom_type = NULL,
    annotations = list(
      labelOptions = list(
        shape = "connector",
        align = "right",
        justify = FALSE,
        crop = TRUE,
        style = list(fontSize = "0.8em", textOutline = "1px white")
      ),
      labels = top_annotations
    )
  )
## 
## Using continuous color scheme grDevices::PiYG
## 
## <colors>
## #90005DFF #315D00FF

Scatter

tbl_2007 <- 
  gapminder %>% filter(year  == 2007)

tbl_2007 %>%
  hc_xy(
    type = "scatter",
    name = "country",
    group = "continent",
    x = "gdpPercap",
    color_type = "discrete",
    color_palette = "lisa::Jean_MichelBasquiat_1",
    y = "lifeExp",
    theme_name = "smpl",
    title = "2007 Life Expectancy by Per Capita Income",
    override_model_groups = T,
    override_legend_location = "proximate",
    fits = c("RF", "LOESS")
  )
## <colors>
## #C11432FF #009ADAFF #66A64FFF #FDD10AFF #070707FF 
## 
## Using discrete color scheme lisa::Jean_MichelBasquiat_1
## 
## [2021-10-05 16:25:16 s.RF] Hello, alexbresler 
## 
## [ Regression Input Summary ]
##    Training features: 142 x 1 
##     Training outcome: 142 x 1 
##     Testing features: Not available
##      Testing outcome: Not available
## 
## [ Parameters ]
##        n.trees: 1000 
##           mtry: NULL 
## 
## [2021-10-05 16:25:16 s.RF] Training Random Forest Regression with 1000 trees... 
##      |      Out-of-bag   |
## Tree |      MSE  %Var(y) |
##  100 |    63.67    43.99 |
##  200 |    64.46    44.54 |
##  300 |    62.58    43.24 |
##  400 |    62.38    43.10 |
##  500 |    61.98    42.82 |
##  600 |    61.95    42.80 |
##  700 |    62.02    42.85 |
##  800 |    62.14    42.93 |
##  900 |    62.21    42.98 |
## 1000 |    62.35    43.08 |
## 
## [ RF Regression Training Summary ]
##     MSE = 15.53 (89.27%)
##    RMSE = 3.94 (67.24%)
##     MAE = 2.69 (74.23%)
##       r = 0.95 (p = 8.5e-71)
##     rho = 0.95 (p = 0.00)
##    R sq = 0.89
## 
## [2021-10-05 16:25:17 s.RF] Run completed in 0.01 minutes (Real: 0.49; User: 0.46; System: 0.03) 
## [2021-10-05 16:25:17 s.LOESS] Hello, alexbresler 
## 
## [ Regression Input Summary ]
##    Training features: 142 x 1 
##     Training outcome: 142 x 1 
##     Testing features: Not available
##      Testing outcome: Not available
## 
## [2021-10-05 16:25:17 s.LOESS] Training LOESS model... 
## 
## [ LOESS Regression Training Summary ]
##     MSE = 48.67 (66.37%)
##    RMSE = 6.98 (42.01%)
##     MAE = 4.81 (54.03%)
##       r = 0.81 (p = 6.1e-35)
##     rho = 0.86 (p = 0.00)
##    R sq = 0.66
## 
## [2021-10-05 16:25:17 s.LOESS] Run completed in 1.1e-03 minutes (Real: 0.07; User: 0.05; System: 0.01)

Lets transform the x axis and add some descriptive text.

text <- "The graph shows us the relationship between
<b> GDP per capita </b> and <b> Life expectancy </ b> for countries in 2007.
It is observed that the large number of countries with low life expectancy belong
to the African continent, of which the majority has low GDP per capita. <br>
For <b> GDP per capita </b> a scale <i> logarithmic </i> due
to the asymmetry of this variable."


tbl_2007 %>%
  hc_xy(
    type = "scatter",
    name = "country",
    group = "continent",
    x = "gdpPercap",
    color_type = "discrete",
    color_palette = "lisa::Jean_MichelBasquiat_1",
    y = "lifeExp",
    size = "pop",
    theme_name = "smpl",
    transformations = "log_x",
    caption = text,
    title = "2007 Life Expectancy by Per Capita Income",
    override_model_groups = T,
    override_legend_location = "proximate",
    fits = c("RF", "LOESS")
  )
## <colors>
## #C11432FF #009ADAFF #66A64FFF #FDD10AFF #070707FF 
## 
## Using discrete color scheme lisa::Jean_MichelBasquiat_1
## 
## [2021-10-05 16:25:17 s.RF] Hello, alexbresler 
## 
## [ Regression Input Summary ]
##    Training features: 142 x 1 
##     Training outcome: 142 x 1 
##     Testing features: Not available
##      Testing outcome: Not available
## 
## [ Parameters ]
##        n.trees: 1000 
##           mtry: NULL 
## 
## [2021-10-05 16:25:17 s.RF] Training Random Forest Regression with 1000 trees... 
##      |      Out-of-bag   |
## Tree |      MSE  %Var(y) |
##  100 |    64.76    44.74 |
##  200 |    63.75    44.05 |
##  300 |    63.97    44.20 |
##  400 |     63.5    43.88 |
##  500 |     63.6    43.94 |
##  600 |    63.18    43.65 |
##  700 |    63.51    43.88 |
##  800 |     63.2    43.67 |
##  900 |     63.2    43.67 |
## 1000 |    62.77    43.37 |
## 
## [ RF Regression Training Summary ]
##     MSE = 15.69 (89.16%)
##    RMSE = 3.96 (67.07%)
##     MAE = 2.70 (74.18%)
##       r = 0.95 (p = 1.9e-70)
##     rho = 0.95 (p = 0.00)
##    R sq = 0.89
## 
## [2021-10-05 16:25:18 s.RF] Run completed in 3e-03 minutes (Real: 0.18; User: 0.16; System: 0.02) 
## [2021-10-05 16:25:18 s.LOESS] Hello, alexbresler 
## 
## [ Regression Input Summary ]
##    Training features: 142 x 1 
##     Training outcome: 142 x 1 
##     Testing features: Not available
##      Testing outcome: Not available
## 
## [2021-10-05 16:25:18 s.LOESS] Training LOESS model... 
## 
## [ LOESS Regression Training Summary ]
##     MSE = 48.67 (66.37%)
##    RMSE = 6.98 (42.01%)
##     MAE = 4.81 (54.03%)
##       r = 0.81 (p = 6.1e-35)
##     rho = 0.86 (p = 0.00)
##    R sq = 0.66
## 
## [2021-10-05 16:25:18 s.LOESS] Run completed in 1e-03 minutes (Real: 0.06; User: 0.05; System: 0.01)

Donut

tbl_diamonds_cut <- 
  ggplot2::diamonds %>% count(cut) %>% 
  mutate(percent = percent(n / sum(n)))

Plot it

tbl_diamonds_cut %>%
  hc_xy(
    name = "cut",
    y = "n",
    size = "percent",
    type = "pie",
    label_parameters = list(format = "{point.name}<br>({point.size})", enabled = T),
    innerSize = "80%"
  )

Add a version with chart in chart!!

tbl_diamonds_cut %>%
  hc_xy(
    name = "cut",
    y = "n",
    size = "percent",
    type = "pie",
    label_parameters = list(format = "{point.name}<br>({point.size})", enabled = T),
    innerSize = "80%"
  ) %>%
  hc_tooltip(
    useHTML = TRUE,
    style = list(fontSize = "30px"),
    headerFormat = "",
    pointFormat = "<div style='text-align: center;'>Cut <b>{point.name}</b><br>{point.y} Cases<br>{point.percent} of total</div>",
    positioner = JS(
      "function () {

        /* one of the most important parts! */
        xp =  this.chart.chartWidth/2 - this.label.width/2
        yp =  this.chart.chartHeight/2 - this.label.height/2

        return { x: xp, y: yp };

      }"
    ),
    shadow = FALSE,
    borderWidth = 0,
    backgroundColor = "transparent",
    hideDelay = 1000
  )

Ridgeline

This one is in Spainish

clima <- 
  datos::clima %>% 
  mutate(
    fecha = lubridate::ymd(paste(anio, mes, dia, paste = "-")),
    mes_fecha = fct_inorder(months(fecha))
  )

hc_xy(
    data = clima,
    x = "temperatura",
    group = "mes_fecha",
    type = "ridgeline",
    theme_name = "better_unica",
    zIndex = 12:1,
    lineWidth = 3,
    title = "Ridgelines",
    color_palette = "pals::jet",
    opacity = 1,
    disable_x = F,
    disable_y = T,
    invert_chart = T
)
## 
## Using continuous color scheme pals::jet
## 
## <colors>
## #00007FFF #0000DCFF #0039FFFF #0096FFFF #00F3FFFF #50FFADFF #ADFF50FF #FFF300FF #FF9600FF #FF3900FF #DC0000FF #7F0000FF

Movies Area and Streamgraph

movies <- ggplot2movies::movies

films <- 
  movies %>%
  select(year, Action:Short) %>%
  gather(category, count, -year) %>%
  group_by(year, category) %>%
  summarise(count = sum(count))

Basic area

hc_xy(
    data = films,
    type = "area",
    x = "year",
    y = "count",
    group = "category",
    color_palette  = "viridis::inferno",
    color_type = "continuous",
    opacity = .5,
    override_y_label = list(
      formatter = JS(
        "function(){
                       return '' + Highcharts.numberFormat(this.value, 0, '.', ',') + ' films';
                       }"
      )
    ),
    stacking = "normal",
    override_series =  list(
      stacking = list(enbled = TRUE),
      marker = list(symbol = "circle")
    ),
    override_tooltip = F
  ) %>%
  hc_tooltip(table = TRUE, sort = TRUE)
## 
## Using continuous color scheme viridis::inferno
## 
## <colors>
## #000004FF #330A5FFF #781C6DFF #BB3754FF #ED6925FF #FCB519FF #FCFFA4FF

Lets add some plot lines and descriptive text

events <- tibble(
  year = c(1930, 1941, 1990),
  text = c(
    "Beginning was golden in Hollywood.",
    "Start of TV",
    "Beginning of the rise of independent cinema.")
)

data_plot_line_x <- events %>%
  transmute(value  = year,
            label = map(text, ~ list(text = .x))) %>% 
  mutate(color = "#666", width = 2, zIndex = 5)

movie_caption <- "Data from <b> IMDB </b> through the better_unicamovies package. <br>Most events were obtained from flash reading in <b> Wikipedia </b>"

Viz it!

hc_xy(
  data = films,
  type = "area",
  x = "year",
  y = "count",
  group = "category",
  color_palette  = "viridis::inferno",
  color_type = "continuous",
  opacity = 1,
  theme_name = "clean_unica",
  caption = movie_caption,
  title = "Popularity of Movies by Type Over Time",
  override_y_label = list(
    formatter = JS(
      "function(){
                       return '' + Highcharts.numberFormat(this.value, 0, '.', ',') + ' films';
                       }"
    )
  ),
  stacking = "normal",
  data_x_lines = data_plot_line_x,
  override_series =  list(
    stacking = list(enbled = TRUE),
    marker = list(symbol = "circle")
  ),
  override_tooltip = F
) %>%
  hc_tooltip(table = TRUE, sort = TRUE)
## 
## Using continuous color scheme viridis::inferno
## 
## <colors>
## #000004FF #330A5FFF #781C6DFF #BB3754FF #ED6925FF #FCB519FF #FCFFA4FF

Tour de France

tourfrance <-
  read_csv(
    "https://raw.githubusercontent.com/jbkunst/blog/master/data/tour_france_state_8.txt"
  )

Basic area chart.

hc_tour_area <- 
  tourfrance %>% mutate(color = "navy") %>%
  hc_xy(
    type = "area",
    opacity = .5,
    x = "distance",
    y = "elevation",
    subtitle = "Example obtained from HighchartsJS documentation",
    override_x_label  = list(format = "{value} km"),
    override_x_text = list(text = "Distance"),
    override_y_label  = list(format = "{value} m"),
    override_y_text = list(text = "Elevation"),
    title = "Tour de Francia 2017, Stage 8: <i>Dole - Station des Rousses</i>",
    disable_legend = T
  ) %>%
  hc_tooltip(headerFormat = "Distance: {point.x:.1f} km<br>",
             pointFormat = "{point.y} m a. s. l.")

Lets see it

hc_tour_area

Lets add some annotations.


df1 <- read_csv(
  '"x","y", "text"
27.98,255, "Arbois"
45.5,611,"Montrond"
63,651,"Mont-sur-Monnet"
84,789,"Bonlieu"
129.5,382,"Chassal"
159,443,"Saint-Claude"'
)

df2 <- read_csv(
  '"x","y","text"
101.44,1026,"Col de la Joux"
138.5,748,"Côte de Viry"
176.4,1202,"Montée de la Combe<br>de Laisia Les Molunes"'
)

df3 <- read_csv(
  '"x","y","text"
96.2,783,"6.1 km climb<br>4.6% on avg."
134.5,540,"7.6 km climb<br>5.2% on avg."
172.2,925,"11.7 km climb<br>6.4% on avg."
'
)

df1_p <- df_to_annotations_labels(df1)

df2_p <- df_to_annotations_labels(df2)

df3_p <- df_to_annotations_labels(df3)

Now we plot it.

hc_tour_area %>%   
  hc_annotations(
    list(
      labelOptions = list(
        backgroundColor = 'rgba(255,255,255,0.5)',
        verticalAlign = "top",
        y = 15
      ),
      labels = df1_p
    ),
    list(labels = df2_p),
    list(
      labelOptions = list(
        shape = "connector",
        align = "right",
        justify = FALSE,
        crop = TRUE,
        style = list(fontSize = "0.8em", textOutline = "1px white")
      ),
      labels = df3_p
    )
)

Treemap

Gapminder

tbl_2007 %>% hc_xy(
  group = c("continent", "country"),
  size = "pop",
  type = "treemap",
  is_sunburst = T,
  title = "2007 Gapminder",
  tree_labels = list(
    list(
      level = 1,
      borderWidth = 0,
      borderColor = "transparent",
      colorByPoint = TRUE,
      dataLabels = list(enabled = TRUE)
    ),
    list(
      level = 2,
      borderWidth = 0,
      borderColor = "transparent",
      colorVariation = list(key = "brightness", to = 0.50),
      dataLabels = list(enabled = TRUE)
    )
  )
)

Sunburst

Gapminder

tbl_2007 %>% 
  mutate(world = "world") %>% 
  hc_xy(
  group = c("world", "continent", "country"),
  size = "pop",
  use_new_treemap = T,
  type = "treemap",
  is_sunburst = T,
  tree_labels =  list(
    list(
      level = 1,
      borderWidth = 0,
      color = "transparent",
      borderColor = "transparent",
      # colorByPoint = TRUE,
      dataLabels = list(enabled = TRUE)
    ),
    list(
      level = 2,
      borderWidth = 0,
      borderColor = "transparent",
      colorByPoint = TRUE,
      dataLabels = list(enabled = TRUE)
    ),
    list(
      level = 3,
      borderWidth = 0,
      borderColor = "transparent",
      colorVariation = list(key = "brightness", to = 0.50),
      dataLabels = list(enabled = TRUE)
    )
  )
)
## 
## Using continuous color scheme scico::vikO
## 
## <colors>
## #4E193DFF

hc_kunst

Alex Bresler

2021-10-05

Josh Kunst Tutorial

Column

Line

Annotated Line Progress Bar

Scatter

Donut

Ridgeline

Movies Area and Streamgraph

Categories

Tour de France

Treemap

Sunburst