Hi all,

Here are some notes from today’s R users group on creating publication quality plots. We’re focusing on plotting using the base R graphics (so not ggplot).

My goal is to

- share some of my favourite plotting tricks
- provide you with a solid worked example showing how to use them.

This post does not try to give a complete coverage of the topics discussed, just a

working example.

Please download the data from this link and save in your current working directory: gapminder-FiveYearData.csv. This the gapminder data on gdp per capita through time.

We want to make a plot, maybe a little like Han’s Rosling’s plot from his great TED talk.

First, here is the not so nice version using R’s defaults:

```
data <- read.csv('gapminder-FiveYearData.csv', stringsAsFactors=FALSE)
df <- data[data$year==2002, ]
plot(df$gdpPercap, df$lifeExp)
```

Now let’s make something a bit nicer. We’re going to:

- log-scale the axes
- put in a label
- make nicely formatted power expressions as labels
- make points a little transparent and coloured by continent
- size the points by country’s population size

You’ll want to read in some handy utility functions, included at the end of this post – copy them and save into a file called `plot-utils.R`

.

```
source("plot-utils.R")
```

This is a file of helpful functions I copy into many of my projects.

Using some of these function, we can make a nicer plot:

```
plot(df$gdpPercap, df$lifeExp, log="xy", type='n', ann=FALSE, axes=FALSE,
xlim=c(10^2, 10^5), ylim = c(30, 90))
box()
points(df$gdpPercap, df$lifeExp, col=make.transparent(col.cat(df$continent), 0.8), pch=16,
cex = linear.rescale(sqrt(df$pop)))
mtext("GDP per capita ($ p.a.)", 1, line=3)
mtext("Life expectancy (yr)", 2, line=3)
axis.log10(1)
axis(2, las=1)
label("2002", cex=2)
```

Note that part of the trick to build the plot up in layers.

Next we want to wrap this into a function, so that we can make a similar plot

for any year:

```
my_plot <- function(data, year) {
df <- data[data$year==year, ]
plot(df$gdpPercap, df$lifeExp, log="xy", type='n', ann=FALSE, axes=FALSE,
xlim=c(10^2, 10^5), ylim = c(30, 90))
box()
points(df$gdpPercap, df$lifeExp, col=make.transparent(col.cat(df$continent), 0.8), pch=16,
cex = linear.rescale(sqrt(df$pop)))
mtext("GDP per capita ($ p.a.)", 1, line=3)
mtext("Life expectancy (yr)", 2, line=3)
axis.log10(1)
axis(2, las=1)
label(year, cex=2)
}
```

Now we can make a plot for any old year

```
my_plot(data, 1987)
```

Finally, save to pdf use the handy `to.pdf`

function (for more on this function see Rich FitzJohn’s blog post on the topic)

```
to.pdf(
my_plot(data, 1987),
"plot.pdf", width= 6, height=6)
```

Now we have things set up so nicely it’s also possible to generate a series of plots, one for each year:

```
for(y in unique(data$year)) {
to.pdf(
my_plot(data, y),
sprintf("plot%s.pdf", y),
width= 6, height=6)
}
```

## Now here’s my list of handy plotting utility functions

Save these into a file called `plot-utils.R`

.

```
# Returns up to 80 unique, nice colors, generated using http://tools.medialab.sciences-po.fr/iwanthue/
# Starts repeating after 80
niceColors<-function(n=80){
cols<-rep(c("#75954F","#D455E9","#E34423","#4CAAE1","#451431","#5DE737","#DC9B94","#DC3788","#E0A732","#67D4C1","#5F75E2","#1A3125","#65E689","#A8313C","#8D6F96","#5F3819","#D8CFE4","#BDE640","#DAD799","#D981DD","#61AD34","#B8784B","#892870","#445662","#493670","#3CA374","#E56C7F","#5F978F","#BAE684","#DB732A","#7148A8","#867927","#918C68","#98A730","#DDA5D2","#456C9C","#2B5024","#E4D742","#D3CAB6","#946661","#9B66E3","#AA3BA2","#A98FE1","#9AD3E8","#5F8FE0","#DF3565","#D5AC81","#6AE4AE","#652326","#575640","#2D6659","#26294A","#DA66AB","#E24849","#4A58A3","#9F3A59","#71E764","#CF7A99","#3B7A24","#AA9FA9","#DD39C0","#604458","#C7C568","#98A6DA","#DDAB5F","#96341B","#AED9A8","#55DBE7","#57B15C","#B9E0D5","#638294","#D16F5E","#504E1A","#342724","#64916A","#975EA8","#9D641E","#59A2BB","#7A3660","#64C32A"),
ceiling(n/80))
cols[1:n]
}
##' Adds a text label at a fixed specified percentage inside plot boundaries.
##' To get the label outside the boundary change px or py to be negative.
##'
##' @title Adds a text label at a fixed specified percentage inside plot boundaries.
##' @param px position in x dimension
##' @param py position in y dimension
##' @param ... other arguments to pass through to \code{axis} function
label <- function(text, px=0.03, py=NULL, ..., adj=c(0, 1)) {
if (is.null(py)) {
fin <- par("fin")
r <- fin[[1]] / fin[[2]]
if (r > 1) { # x is longer.
py <- 1 - px
px <- (1 - py) / r
} else {
py <- 1 - px * r
}
}
usr <- par("usr")
x <- usr[1] + px*(usr[2] - usr[1])
y <- usr[3] + py*(usr[4] - usr[3])
## NOTE: base 10 log:
if (par("xlog")) {
x <- 10^x
}
if (par("ylog")) {
y <- 10^y
}
text(x, y, text, adj=adj, ...)
}
##' Make a given color transparent
##' @param col Base colour
##' @param opacity Desired opacity
##' @author Rich FitzJohn
make.transparent <- function(col, opacity=.5) {
alpha <- opacity
if (length(alpha) > 1 && any(is.na(alpha))) {
n <- max(length(col), length(alpha))
alpha <- rep(alpha, length.out=n)
col <- rep(col, length.out=n)
ok <- !is.na(alpha)
ret <- rep(NA, length(col))
ret[ok] <- make_transparent(col[ok], alpha[ok])
ret
} else {
tmp <- col2rgb(col)/255
rgb(tmp[1,], tmp[2,], tmp[3,], alpha=alpha)
}
}
##' Save context of \code{expr} to the specified device. For more information see
##' http://nicercode.github.io/blog/2013-07-09-figure-functions/
##' @param expr Expression creating plot
##' @param dev Plotting device, e.g. \code{pdf} or \code{png}
##' @param ... Other arguments to pass through to device
##' @author Rich FitzJohn
to.dev <- function(expr, dev, filename, ..., verbose=TRUE) {
if ( verbose )
cat(sprintf("Creating %s\n", filename))
dev(filename, ...)
on.exit(dev.off())
eval.parent(substitute(expr))
}
to.pdf <- function(expr, filename, ...) {
to.dev(expr, pdf, filename, ...)
}
to.png <- function(expr, filename, ...) {
to.dev(expr, png, filename, ...)
}
##' Add log-scaled axes to current plot
##' If provided, uses specified values of \code{at}
##' Otherwise generates suitable values.
##' @param side Side of plot to add axis
##' @param label Add labels?
##' @param wholenumbers Only use whole numbers?
##' @param labelEnds Include label ends?
##' @param las text orientation
##' @author Daniel Falster
axis.log10 <- function(side=1, add.labels=TRUE, wholenumbers=TRUE,
labelEnds=TRUE, las=1) {
#get range on axis
if(side ==1 | side ==3) {
r <- par("usr")[1:2] #upper and lower limits of x-axis
logged <- par("xlog")
} else {
r <- par("usr")[3:4] #upper and lower limits of y-axis
logged <- par("xlog")
}
# make pretty intervals
at <- pretty(r)
# drop ends if desirable
if(!labelEnds)
at <- at[at > r[1] & at < r[2]]
# restrict to whole numbers if desirable
if(wholenumbers){
at<-at[is.wholenumber(at)]
}
labels <- do.call(expression, lapply(at, function(i) bquote(10^.(i))))
at <- 10^at
# make labels
if(add.labels) {
axis(side, at=at, labels=labels, las=las)
} else {
axis(side, at=at, labels=FALSE, las=las)
}
}
is.wholenumber <- function(x, tol = .Machine$double.eps^0.5) {
abs(x - round(x)) < tol
}
##' Return a vector of colours based on values of \code{x}
##' @param x Categorical data
##' @param v Range of rescaled data (min, max)
##' @author Daniel Falster
col.cat <- function(x=NULL){
if(is.null(x)) {
return(NULL)
}
v <- unique(x)
ret <- niceColors(length(v))
names(ret) <- as.character(v)
ret[as.character(x)]
}
##' Linear rescale x between range specified by \code{r.out}
##' @param x Data to be rescaled
##' @param r.out Range of rescaled data (min, max)
##' @author Daniel Falster
linear.rescale <- function(x, r.out= c(0.2, 10)) {
p <- (x - min(x)) / (max(x) - min(x))
r.out[[1]] + p * (r.out[[2]] - r.out[[1]])
}
```