May 3, 2024

The typical way

iris %>% ggplot(aes(Species, Sepal.Width, color=Species)) +
    stat_summary(fun.data=mean_cl_boot, geom="pointrange") +
    geom_quasirandom(alpha=0.3) +
    scale_colour_manual(values=c(
        setosa = "#842EBD",
        versicolor = "#CA26E2",
        virginica = "#EB75F5"
    )) +
    theme(legend.position="none")

The typical way

iris %>% ggplot(aes(Species, Sepal.Length, color=Species)) +
    stat_summary(fun.data=mean_cl_boot, geom="pointrange") +
    geom_quasirandom(alpha=0.3) +
    scale_colour_manual(values=c(
        setosa = "#842EBD",
        versicolor = "#CA26E2",
        virginica = "#EB75F5"
    )) +
    theme(legend.position="none")

The typical way

iris %>% ggplot(aes(Species, Petal.Width, color=Species)) +
    stat_summary(fun.data=mean_cl_boot, geom="pointrange") +
    geom_quasirandom(alpha=0.3) +
    scale_colour_manual(values=c(
        setosa = "#842EBD",
        versicolor = "#CA26E2",
        virginica = "#EB75F5"
    )) +
    theme(legend.position="none")

The typical way sucks

iris %>% ggplot(aes(Species, Sepal.Width, color=Species)) +
    stat_summary(fun.data=mean_cl_boot, geom="pointrange") +
    geom_quasirandom(alpha=0.3) +
    scale_colour_manual(values=c(
        setosa = "#842EBD",
        versicolor = "#CA26E2",
        virginica = "#EB75F5"
    )) +
    theme(legend.position="none")

iris %>% ggplot(aes(Species, Sepal.Length, color=Species)) +
    stat_summary(fun.data=mean_cl_boot, geom="pointrange") +
    geom_quasirandom(alpha=0.3) +
    scale_colour_manual(values=c(
        setosa = "#842EBD",
        versicolor = "#CA26E2",
        virginica = "#EB75F5"
    )) +
    theme(legend.position="none")

iris %>% ggplot(aes(Species, Petal.Width, color=Species)) +
    stat_summary(fun.data=mean_cl_boot, geom="pointrange") +
    geom_quasirandom(alpha=0.3) +
    scale_colour_manual(values=c(
        setosa = "#842EBD",
        versicolor = "#CA26E2",
        virginica = "#EB75F5"
    )) +
    theme(legend.position="none")

Using a function?

plot_species = function(data, yvar) {
    data %>% ggplot(aes(Species, yvar, color=Species)) +
        stat_summary(fun.data=mean_cl_boot, geom="pointrange") +
        geom_quasirandom(alpha=0.3) +
        scale_colour_manual(values=c(
            setosa = "#842EBD",
            versicolor = "#CA26E2",
            virginica = "#EB75F5"
        )) + theme(legend.position="none")
}

iris %>% plot_species("Petal.Width")

Using a function?

plot_species = function(data, yvar) {
    data %>% ggplot(aes(Species, yvar, color=Species)) +
        stat_summary(fun.data=mean_cl_boot, geom="pointrange") +
        geom_quasirandom(alpha=0.3) +
        scale_colour_manual(values=c(
            setosa = "#842EBD",
            versicolor = "#CA26E2",
            virginica = "#EB75F5"
        )) + theme(legend.position="none")
}

iris %>% plot_species(Petal.Width)
## Error in `stat_summary()`:
## ! Problem while computing aesthetics.
## ℹ Error occurred in the 1st layer.
## Caused by error in `FUN()`:
## ! object 'Petal.Width' not found

Using a function with {{var}}

plot_species = function(data, yvar) {
    data %>% ggplot(aes(Species, {{yvar}}, color=Species)) +
        stat_summary(fun.data=mean_cl_boot, geom="pointrange") +
        geom_quasirandom(alpha=0.3) +
        scale_colour_manual(values=c(
            setosa = "#842EBD",
            versicolor = "#CA26E2",
            virginica = "#EB75F5"
        )) + theme(legend.position="none")
}

iris %>% plot_species(Petal.Width) +  # library(patchwork)
iris %>% plot_species(Petal.Length) +
iris %>% plot_species(Petal.Width)

Using a function with {{var}}

See more on Hadley’s blog post

mean_by = function(data, var, by) {
    data %>%
        group_by({{ by }}) %>%
        summarise("{{var}}" := max({{ var }}, na.rm = TRUE))
}

iris %>% mean_by(Petal.Width, by=Species)
## # A tibble: 3 × 2
##   Species    Petal.Width
##   <fct>            <dbl>
## 1 setosa             0.6
## 2 versicolor         1.8
## 3 virginica          2.5

Arbitrary arguments with ...

plot_relationship = function(data, ...) {
    data %>% ggplot(aes(...)) +
        geom_point(alpha=0.5) +
        stat_smooth(method="lm",  alpha=0.2)
}

iris %>% plot_relationship(Petal.Width, Petal.Length) +
iris %>% plot_relationship(Petal.Width, Sepal.Width) +
iris %>% plot_relationship(Petal.Width, Sepal.Width, color=Species)

Arbitrary arguments with ...

props = function(data, ...) {
    data %>%
    count(...) %>%
    mutate(prop = n / sum(n), .keep="unused")
}

iris %>%
    group_by(Species) %>%
    props(Sepal.Length > 5, Sepal.Width > 3)
## # A tibble: 9 × 4
## # Groups:   Species [3]
##   Species    `Sepal.Length > 5` `Sepal.Width > 3`  prop
##   <fct>      <lgl>              <lgl>             <dbl>
## 1 setosa     FALSE              FALSE              0.16
## 2 setosa     FALSE              TRUE               0.4 
## 3 setosa     TRUE               TRUE               0.44
## 4 versicolor FALSE              FALSE              0.06
## 5 versicolor TRUE               FALSE              0.78
## 6 versicolor TRUE               TRUE               0.16
## 7 virginica  FALSE              FALSE              0.02
## 8 virginica  TRUE               FALSE              0.64
## 9 virginica  TRUE               TRUE               0.34

drop_extreme = function(data, ..., q_lo=0, q_hi=0.95) {
    data %>%
        drop_na(...) %>%
        filter(if_all(c(...), ~
            between(.x, quantile(.x, q_lo), quantile(.x, q_hi))
    ))
}

bad_iris = iris %>% add_row(Petal.Length=0., Petal.Width=30)
bad_plot = bad_iris %>% plot_relationship(Petal.Width, Petal.Length)

good_plot = bad_iris %>%
    drop_extreme(Petal.Width, Petal.Length, q_lo=.025, q_hi=.975) %>%
    plot_relationship(Petal.Width, Petal.Length)

bad_plot + good_plot

A more composable approach to plotting: Variables are a thing

iris_pal = scale_colour_manual(values=c(
    setosa = "#842EBD",
    versicolor = "#CA26E2",
    virginica = "#EB75F5"
), aesthetics=c("fill", "colour"))


no_legend = theme(legend.position="none")

point_lm = list(
    geom_point(alpha=0.5),
    stat_smooth(method="lm",  alpha=0.2)
)

iris %>% ggplot(aes(Petal.Width, Petal.Length)) + point_lm +
iris %>% ggplot(aes(Petal.Width, Petal.Length, color=Species)) +
    point_lm + iris_pal + no_legend

¿Por qué no los dos?

psych_bars = function(..., err_width=.1) {
    list(
        stat_summary(fun=mean, geom="bar", ...),
        stat_summary(fun.data=mean_cl_boot, geom="errorbar", width=err_width)
    )
}

iris %>% ggplot(aes(Species, Petal.Width, fill=Species)) +
    psych_bars(width=.7, err_width=0.2, alpha=0.5) +
    iris_pal + no_legend

# see code for robust definition -- it drops points with too few observations
bin_points_line = function(bins, min_n=5, ...) {
    list(
        stat_summary_bin(fun.data=robust(mean_cl_boot, min_n), bins=bins, ...),
        stat_summary_bin(fun=robust(mean, min_n), bins=bins, ..., geom="line")
    )
}

iris %>% ggplot(aes(Petal.Width, Petal.Length)) +
  bin_points_line(bins=7)

linear_fit = function(alpha=0.2, ...) {
    stat_smooth(method="lm",  alpha=alpha, ...)
}
logistic_fit = function(alpha=0.2, ...) {
    stat_smooth(method="glm", method.args = list(family="binomial"), alpha=alpha, ...)
}
gam_fit = function(k=-1, ...) {
    stat_smooth(method = "gam", formula = y ~ s(x, bs = "cs", k=k), ...)
}

p = iris %>% ggplot(aes(Petal.Width, Petal.Length))
p + bin_points(7) + linear_fit() +
p + bin_points(7) + gam_fit() +
p + bin_points(7) + gam_fit(k=3, color="dodgerblue")

Bonus: Automatically formatting plot labels

See the source code for pretty_labels defintion.

p = iris %>% ggplot(aes(Petal.Width, Sepal.Width, color=Species)) +
    geom_point(alpha=0.3) + linear_fit() +
    iris_pal

p + (p + pretty_labels)

Bonus: Simplifying regression syntax

See the source code for regress defintion.

iris %>%
    filter(Species == "versicolor") %>%
    regress(Sepal.Width ~ Petal.Width)
## 
## Call:
## lm(formula = form, data = data)
## 
## Coefficients:
## (Intercept)  Petal.Width  
##       1.373        1.054

Bonus: Simplifying regression syntax

See the source code for regress defintion.

iris %>%
    regress(Species == "versicolor" ~ Sepal.Width + Petal.Width, logistic=T)
## 
## Call:  glm(formula = form, family = binomial, data = data)
## 
## Coefficients:
## (Intercept)  Sepal.Width  Petal.Width  
##      9.3324      -3.3044      -0.1512  
## 
## Degrees of Freedom: 149 Total (i.e. Null);  147 Residual
## Null Deviance:       191 
## Residual Deviance: 151.7     AIC: 157.7

Bonus: Simplifying regression syntax

See the source code for regress defintion.

iris %>%
    mutate(island = rep(1:15, 10)) %>%
    regress(Species == "versicolor" ~ Sepal.Width + Petal.Width, logistic=T, group_var="island")
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: Species == "versicolor" ~ Sepal.Width + Petal.Width + 1 + (Sepal.Width +      Petal.Width + 1 || island)
##    Data: data
##      AIC      BIC   logLik deviance df.resid 
## 163.6684 181.7322 -75.8342 151.6684      144 
## Random effects:
##  Groups   Name        Std.Dev.
##  island   (Intercept) 0       
##  island.1 Sepal.Width 0       
##  island.2 Petal.Width 0       
## Number of obs: 150, groups:  island, 15
## Fixed Effects:
## (Intercept)  Sepal.Width  Petal.Width  
##      9.3324      -3.3044      -0.1512  
## optimizer (Nelder_Mead) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings