A declarative approach to creating plots from tidy data.
| Action |
Code |
Details |
|
Set default theme
|
theme_set(theme_minimal())
|
|
|
Set viridis as default color
|
options(
ggplot2.discrete.colour = 'viridis',
ggplot2.discrete.fill = 'viridis',
ggplot2.continuous.colour = 'viridis',
ggplot2.continuous.fill = 'viridis'
)
|
|
|
Set viridis option as default color
|
options(
ggplot2.discrete.colour = function(...) scale_color_viridis_d(..., option = 'turbo'),
ggplot2.discrete.fill = function(...) scale_fill_viridis_d(..., option = 'turbo'),
ggplot2.continuous.colour = function(...) scale_color_viridis_c(..., option = 'turbo'),
ggplot2.continuous.fill = function(...) scale_fill_viridis_c(..., option = 'turbo')
)
|
|
| Action |
Code |
Details |
|
Boxplot, oriented horizontally
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_boxplot()
|
|
|
Boxplot, hide outliers
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_boxplot(outlier.shape = NA)
|
|
|
Boxplot, oriented vertically
|
ggplot(iris, aes(y = Sepal.Length)) +
geom_boxplot()
|
|
|
Boxplot by group
|
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
geom_boxplot()
|
|
|
Boxplot by two grouping factors
|
ggplot(diamonds, aes(x = price, y = cut, fill = clarity)) +
geom_boxplot()
|
|
| Action |
Code |
Details |
|
Plot histogram for discrete data (frequency per category)
|
ggplot(iris, aes(x = Species)) +
geom_bar()
|
|
|
Plot normalized histogram (AUC = 1) for discrete data
|
ggplot(iris, aes(x = Species)) +
geom_bar(aes(y = after_stat(count) / sum(after_stat(count))))
|
|
|
Plot histogram centered at integer values
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(binwidth = 1, boundary = -.5)
|
|
|
Plot histogram for continuous data
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram()
|
|
|
Plot histogram as a stack of dots (dot plot)
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_dotplot()
|
|
|
Plot histogram with bin-width
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(binwidth = 1.0)
|
|
|
Plot histogram with a given number of bins
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(bins = 5)
|
|
|
Plot histogram and kernel density, for bin width w
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(binwidth = w) +
geom_density(aes(y = ..count.. * w))
|
|
|
Plot normalized histogram (AUC = 1)
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(aes(y = ..density..))
|
|
|
Plot normalized histogram (AUC = 1) and kernel density
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(aes(y = ..density..)) +
geom_density()
|
|
|
Plot proportional histogram as percentage
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(aes(y = after_stat(count) / sum(after_stat(count)))) +
scale_y_continuous(labels = scales::percent) +
labs(y = 'Proportion')
|
|
|
Facetted histogram chart
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram() +
facet_wrap(~ Species)
|
|
|
Facetted histogram chart, normalized per facet
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(aes(y = after_stat(width * density))) +
facet_wrap(~ Species) +
scale_y_continuous(labels = scales::percent) +
labs(y = 'Proportion')
|
|
|
Facetted histogram chart, normalized per facet
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_histogram(aes(
y = after_stat(count) /
tapply(after_stat(count), after_stat(PANEL), sum)[after_stat(PANEL)])
) +
facet_wrap(~ Species)
|
Yikes |
| Action |
Code |
Details |
|
Plot kernel density (AUC = 1)
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_density()
|
|
|
Plot kernel density rescaled with peak at 1.0
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_density(aes(y = ..ndensity..))
|
|
|
Plot kernel density rescaled with peak at 1.0
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_density(aes(y = ..scaled..))
|
|
|
Plot kernel density by group
|
ggplot(iris, aes(x = Sepal.Length, color = Species)) +
geom_density()
|
|
|
Facetted kernel density chart
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_density() + facet_wrap(~ Species)
|
|
|
Violin plot
|
|
Doesn't seem to be possible. |
|
Violin plot with grouping factor
|
ggplot(iris, aes(x = Sepal.Length, y = Species)) +
geom_violin()
|
|
|
Violin plot with grouping factor, oriented vertically
|
ggplot(iris, aes(x = Species, y = Sepal.Length)) +
geom_violin()
|
|
| Action |
Code |
Details |
|
Plot stepped cumulative density
|
ggplot(iris, aes(x = Sepal.Length)) +
stat_ecdf()
|
|
|
Plot stepped cumulative density for available interval only
|
ggplot(iris, aes(x = Sepal.Length)) +
stat_ecdf(pad = FALSE)
|
|
|
Plot stepped inversed / complementary cumulative density
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_step(aes(y = 1 - ..y..), stat='ecdf')
|
|
|
Plot stepped cumulative density by group
|
ggplot(iris, aes(x = Sepal.Length, color = Species)) +
stat_ecdf()
|
|
|
Facetted plot of stepped cumulative density
|
ggplot(iris, aes(x = Sepal.Length)) +
stat_ecdf() + facet_wrap(~ Species)
|
|
|
Plot interpolated cumulative density
|
ggplot(iris, aes(x = Sepal.Length)) +
geom_line(stat='ecdf')
|
|
|
Plot cumulative density as barplot
|
ggplot(iris, aes(x = Sepal.Length)) +
stat_ecdf(geom='bar')
|
Ugly, as bars are equal width |
|
Plot step-wise cumulative density with density rug
|
ggplot(iris, aes(x = Sepal.Length)) +
stat_ecdf() + geom_rug()
|
|
Plots involving two continuous variables.
| Action |
Code |
Details |
|
Add horizontal line
|
p + geom_hline(yintercept = 0)
|
|
|
Add vertical line
|
p + geom_vline(xintercept = 0)
|
|
|
Add intercept-slope (ab) line
|
p + geom_abline(intercept = 0, slope = 1)
|
|
|
Smooth data
|
ggplot(economics, aes(x = date, y = unemploy)) +
geom_line() +
geom_smooth()
|
|
|
Smooth with intercept-slope model
|
ggplot(economics, aes(x = date, y = unemploy)) +
geom_line() +
geom_smooth(method = 'lm', formula = y ~ x)
|
|
|
Smooth data only at the specified x-axis locations
|
ggplot(economics, aes(x = date, y = unemploy)) +
geom_line() +
geom_smooth(
xseq = seq(0, 5000, by = 100)
)
|
Not a great example since this is easier for a numeric x-axis. |
|
Quantile regression
|
ggplot(economics, aes(x = date, y = unemploy)) +
geom_line() +
geom_quantile()
|
|
| Action |
Code |
Details |
|
Scatter plot
|
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point()
|
|
|
Scatter plot without overlap (jitter plot)
|
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_jitter()
|
|
|
Scatter plot with bigger points for overlaps, with legend for counts
|
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_count()
|
|
|
Scatter plot with bigger points for overlaps, with legend for proportional size
|
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_count(aes(size = ..prop..))
|
|
|
Scatter plot with grouping factor
|
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width, color = Species)) +
geom_point()
|
|
|
Facetted scatter plot
|
ggplot(iris, aes(x = Sepal.Length, y = Sepal.Width)) +
geom_point() +
facet_wrap(~ Species)
|
|
| Action |
Code |
Details |
|
Line plot
|
ggplot(economics, aes(x = date, y = unemploy)) + geom_line()
|
|
|
Line plot, with separate lines per group
|
ggplot(Orthodont, aes(x = age, y = distance, group = Subject)) +
geom_line()
|
|
|
Line plot involving duplicate observations per x, as confidence region
|
|
|
|
Facetted line plot
|
ggplot(Orthodont, aes(x = age, y = distance, group = Subject)) +
geom_line() +
facet_wrap(~ Sex)
|
|
| Action |
Code |
Details |
|
Set title
|
p + labs(title = 'My title')
|
|
|
Set subtitle
|
p + labs(subtitle = 'My subtitle')
|
|
|
Set caption
|
p + labs(caption = 'Based on iris data')
|
|
|
Flip axes
|
|
|
|
1:1 aspect ratio of axes
|
|
|
|
Set range of axes
|
p + xlim(c(0, 10)) + ylim(c(5, 15))
|
|
|
Extend range to include data points
|
p + expand_limits(x = c(0, 1, 5), y = 1)
|
|
|
Set axis ticks, with corresponding tick labels
|
p + scale_x_continuous(breaks = 1:3, labels = LETTERS[1:3])
|
|
|
Base-10 log scale axis
|
|
|
|
Base 10 log scale axis with labels
|
p + scale_x_log10(labels = scales::label_log())
|
|
|
Set axis label
|
p + xlab('X-axis') + ylab('Y-axis')
|
|
|
Show labels at 45 degree angle
|
theme(axis.text.x = element_text(angle = 45))
|
|
|
Label with subscript
|
p + labs(x = expression('Pressure support cmH' [2] * 'O)' )
|
Strange syntax |
|
Set title and axis labels
|
p + labs(title = 'My title', x = 'X-axis', y = 'Y-axis')
|
|
|
Format axis with percentage labels
|
p + scale_x_continuous(labels=scales::percent)
|
|
|
Format axis with comma as thousands separator
|
p + scale_x_continuous(labels=scales::comma)
|
|
|
Broken axis
|
|
Not possible unless manually drawing as two stacked plots |
|
Hide legend of a specific scale
|
p + scale_fill_discrete(guide = 'none')
|
|
|
Hide all legends
|
theme(legend.position = 'none')
|
It is more readable to specify per scale that the legend should be hidden, see show.legend |
|
Hide all legends
|
p + guides(fill = 'none', color = 'none', linetype = 'none', shape = 'none')
|
|
|
Legend without lines
|
p + guides(
fill = guide_legend(override.aes = list(linetype = 0)),
color = guide_legend(override.aes = list(linetype = 0))
)
|
|
|
Multirow legend
|
p + guides(fill = guide_legend(nrow = 2, byrow = TRUE))
|
Makes the fill legend multirow |
|
Draw grid in front of geoms
|
x_intercept = ggplot_build(p)$layout$panel_ranges[[1]]$x.major_source
y_intercept = ggplot_build(p)$layout$panel_ranges[[1]]$y.major_source
p + geom_vline(xintercept = x_intercept, color = '#f0f0f0') +
geom_hline(yintercept = y_intercept, color = '#f0f0f0') +
|
|
|
Swap data of plot
|
|
|
|
Plot using a subset of the (new) data
|
p %+% subset(mpg, fl == '2')
|
|