Skip to content
Incomplete sheet

This sheet is incomplete and could use some attention. Please submit code snippet suggestions as an issue or PR here.

ggplot2

A declarative approach to creating plots from tidy data.

See https://ggplot2.tidyverse.org/

Details This sheet is a work in progress.

Code

library(ggplot2)
library(scales)
data(iris)
data(diamonds)
data(economics)

# some snippets use the Orthodont data from the nlme package
library(nlme)
data(Orthodont)

Theming

For themes, see https://ggplot2-book.org/themes#sec-themes

Action Code Details
Set default theme
theme_set(theme_minimal())
Set viridis as default color
options(
    ggplot2.discrete.colour = 'viridis',
    ggplot2.discrete.fill = 'viridis',
    ggplot2.continuous.colour = 'viridis',
    ggplot2.continuous.fill = 'viridis'
)
Set viridis option as default color
options(
    ggplot2.discrete.colour = function(...) scale_color_viridis_d(..., option = 'turbo'),
    ggplot2.discrete.fill = function(...) scale_fill_viridis_d(..., option = 'turbo'),
    ggplot2.continuous.colour = function(...) scale_color_viridis_c(..., option = 'turbo'),
    ggplot2.continuous.fill = function(...) scale_fill_viridis_c(..., option = 'turbo')
)

Plots

Distribution plots

Boxplots

Action Code Details
Boxplot, oriented horizontally
ggplot(iris, aes(x = Sepal.Length)) +
    geom_boxplot()
Boxplot, hide outliers
ggplot(iris, aes(x = Sepal.Length)) +
    geom_boxplot(outlier.shape = NA)
Boxplot, oriented vertically
ggplot(iris, aes(y = Sepal.Length)) +
    geom_boxplot()
Boxplot by group
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
    geom_boxplot()
Boxplot by two grouping factors
ggplot(diamonds, aes(x = price, y = cut, fill = clarity)) +
    geom_boxplot()

Histograms

Action Code Details
Plot histogram for discrete data (frequency per category)
ggplot(iris, aes(x = Species)) +
    geom_bar()
Plot normalized histogram (AUC = 1) for discrete data
ggplot(iris, aes(x = Species)) +
    geom_bar(aes(y = after_stat(count) / sum(after_stat(count))))
Plot histogram centered at integer values
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram(binwidth = 1, boundary = -.5)
Plot histogram for continuous data
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram()
Plot histogram as a stack of dots (dot plot)
ggplot(iris, aes(x = Sepal.Length)) +
    geom_dotplot()
Plot histogram with bin-width
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram(binwidth = 1.0)
Plot histogram with a given number of bins
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram(bins = 5)
Plot histogram and kernel density, for bin width w
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram(binwidth = w) +
    geom_density(aes(y = ..count.. * w))
Plot normalized histogram (AUC = 1)
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram(aes(y = ..density..))
Plot normalized histogram (AUC = 1) and kernel density
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram(aes(y = ..density..)) +
    geom_density()
Plot proportional histogram as percentage
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram(aes(y = after_stat(count) / sum(after_stat(count)))) +
    scale_y_continuous(labels = scales::percent) +
    labs(y = 'Proportion')
Facetted histogram chart
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram() +
    facet_wrap(~ Species)
Facetted histogram chart, normalized per facet
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram(aes(y = after_stat(width * density))) +
    facet_wrap(~ Species) +
    scale_y_continuous(labels = scales::percent) +
    labs(y = 'Proportion')
Facetted histogram chart, normalized per facet
ggplot(iris, aes(x = Sepal.Length)) +
    geom_histogram(aes(
        y = after_stat(count) /
            tapply(after_stat(count), after_stat(PANEL), sum)[after_stat(PANEL)])
    ) +
    facet_wrap(~ Species)
Yikes

Kernel density plots

Action Code Details
Plot kernel density (AUC = 1)
ggplot(iris, aes(x = Sepal.Length)) +
    geom_density()
Plot kernel density rescaled with peak at 1.0
ggplot(iris, aes(x = Sepal.Length)) +
    geom_density(aes(y = ..ndensity..))
Plot kernel density rescaled with peak at 1.0
ggplot(iris, aes(x = Sepal.Length)) +
    geom_density(aes(y = ..scaled..))
Plot kernel density by group
ggplot(iris, aes(x = Sepal.Length, color = Species)) +
    geom_density()
Facetted kernel density chart
ggplot(iris, aes(x = Sepal.Length)) +
    geom_density() + facet_wrap(~ Species)
Violin plot
?
Doesn't seem to be possible.
Violin plot with grouping factor
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
    geom_violin()
Violin plot with grouping factor, oriented vertically
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
    geom_violin()

Empirical cumulative density plots

Action Code Details
Plot stepped cumulative density
ggplot(iris, aes(x = Sepal.Length)) +
    stat_ecdf()
Plot stepped cumulative density for available interval only
ggplot(iris, aes(x = Sepal.Length)) +
    stat_ecdf(pad = FALSE)
Plot stepped inversed / complementary cumulative density
ggplot(iris, aes(x = Sepal.Length)) +
    geom_step(aes(y = 1 - ..y..), stat='ecdf')
Plot stepped cumulative density by group
ggplot(iris, aes(x = Sepal.Length, color = Species)) +
    stat_ecdf()
Facetted plot of stepped cumulative density
ggplot(iris, aes(x = Sepal.Length)) +
    stat_ecdf() + facet_wrap(~ Species)
Plot interpolated cumulative density
ggplot(iris, aes(x = Sepal.Length)) +
    geom_line(stat='ecdf')
Plot cumulative density as barplot
ggplot(iris, aes(x = Sepal.Length)) +
    stat_ecdf(geom='bar')
Ugly, as bars are equal width
Plot step-wise cumulative density with density rug
ggplot(iris, aes(x = Sepal.Length)) +
    stat_ecdf() + geom_rug()

Bivariate plots

Plots involving two continuous variables.

Action Code Details
Add horizontal line
p + geom_hline(yintercept = 0)
Add vertical line
p + geom_vline(xintercept = 0)
Add intercept-slope (ab) line
p + geom_abline(intercept = 0, slope = 1)
Smooth data
ggplot(economics, aes(x = date, y = unemploy)) +
    geom_line() +
    geom_smooth()
Smooth with intercept-slope model
ggplot(economics, aes(x = date, y = unemploy)) +
    geom_line() +
    geom_smooth(method = 'lm', formula = y ~ x)
Smooth data only at the specified x-axis locations
ggplot(economics, aes(x = date, y = unemploy)) +
    geom_line() +
    geom_smooth(
        xseq = seq(0, 5000, by = 100)
    )
Not a great example since this is easier for a numeric x-axis.
Quantile regression
ggplot(economics, aes(x = date, y = unemploy)) +
    geom_line() +
    geom_quantile()

Scatter plots

Action Code Details
Scatter plot
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
    geom_point()
Scatter plot without overlap (jitter plot)
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
    geom_jitter()
Scatter plot with bigger points for overlaps, with legend for counts
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
    geom_count()
Scatter plot with bigger points for overlaps, with legend for proportional size
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
    geom_count(aes(size = ..prop..))
Scatter plot with grouping factor
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
    geom_point()
Facetted scatter plot
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
    geom_point() +
    facet_wrap(~ Species)

Line plots

Action Code Details
Line plot
ggplot(economics, aes(x = date, y = unemploy)) + geom_line()
Line plot, with separate lines per group
ggplot(Orthodont, aes(x = age, y = distance, group = Subject)) +
    geom_line()
Line plot involving duplicate observations per x, as confidence region
?
Facetted line plot
ggplot(Orthodont, aes(x = age, y = distance, group = Subject)) +
    geom_line() +
    facet_wrap(~ Sex)

Facetting

Action Code Details
Use labeling function for facet titles
p + facet_grid(~ Group, labeller = label_parsed)

Aestethic options

Action Code Details
Define mapping programmatically
var = 'Sepal.Length'
aes_string(x = var)
Deprecated, but no clue what the alternative is, since documentation is lacking

Plot configuration / esthetics

Action Code Details
Set title
p + labs(title = 'My title')
Set subtitle
p + labs(subtitle = 'My subtitle')
Set caption
p + labs(caption = 'Based on iris data')
Flip axes
p + coord_flip()
1:1 aspect ratio of axes
p + coord_equal()
Set range of axes
p + xlim(c(0, 10)) + ylim(c(5, 15))
Extend range to include data points
p + expand_limits(x = c(0, 1, 5), y = 1)
Set axis ticks, with corresponding tick labels
p + scale_x_continuous(breaks = 1:3, labels = LETTERS[1:3])
Base-10 log scale axis
p + scale_x_log10()
Base 10 log scale axis with labels
p + scale_x_log10(labels = scales::label_log())
Set axis label
p + xlab('X-axis') + ylab('Y-axis')
Show labels at 45 degree angle
theme(axis.text.x = element_text(angle = 45))
Label with subscript
p + labs(x = expression('Pressure support cmH' [2] * 'O)' )
Strange syntax
Set title and axis labels
p + labs(title = 'My title', x = 'X-axis', y = 'Y-axis')
Format axis with percentage labels
p + scale_x_continuous(labels=scales::percent)
Format axis with comma as thousands separator
p + scale_x_continuous(labels=scales::comma)
Broken axis
?
Not possible unless manually drawing as two stacked plots
Hide legend of a specific scale
p + scale_fill_discrete(guide = 'none')
Hide all legends
theme(legend.position = 'none')
It is more readable to specify per scale that the legend should be hidden, see show.legend
Hide all legends
p + guides(fill = 'none', color = 'none', linetype = 'none', shape = 'none')
Legend without lines
p + guides(
    fill = guide_legend(override.aes = list(linetype = 0)),
    color = guide_legend(override.aes = list(linetype = 0))
)
Multirow legend
p + guides(fill = guide_legend(nrow = 2, byrow = TRUE))
Makes the fill legend multirow
Draw grid in front of geoms
x_intercept = ggplot_build(p)$layout$panel_ranges[[1]]$x.major_source
y_intercept = ggplot_build(p)$layout$panel_ranges[[1]]$y.major_source

p + geom_vline(xintercept = x_intercept, color = '#f0f0f0') +
        geom_hline(yintercept = y_intercept, color = '#f0f0f0') +
Swap data of plot
p %+% newdata
Plot using a subset of the (new) data
p %+% subset(mpg, fl == '2')