First I’ll load some packages.

library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(ggridges)
library(ggthemes)

Load the data

The code below downloads the weather data for today’s example. Note that I cache this chunk because it takes a while to run (also note that I loaded the other packages I need in a separate chunk – caching will keep the data but won’t reload any packages from the cached chunk).

library(rnoaa)

weather = 
  meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
                      var = c("PRCP", "TMIN", "TMAX"), 
                      date_min = "2016-01-01",
                      date_max = "2016-12-31") %>%
  mutate(
    name = recode(id, USW00094728 = "CentralPark_NY", 
                      USC00519397 = "Waikiki_HA",
                      USS0023B17S = "Waterhole_WA"),
    tmin = tmin / 10,
    tmax = tmax / 10) %>%
  select(name, id, everything())
weather
## # A tibble: 1,098 x 6
##              name          id       date  prcp  tmax  tmin
##             <chr>       <chr>     <date> <dbl> <dbl> <dbl>
##  1 CentralPark_NY USW00094728 2016-01-01     0   5.6   1.1
##  2 CentralPark_NY USW00094728 2016-01-02     0   4.4   0.0
##  3 CentralPark_NY USW00094728 2016-01-03     0   7.2   1.7
##  4 CentralPark_NY USW00094728 2016-01-04     0   2.2  -9.9
##  5 CentralPark_NY USW00094728 2016-01-05     0  -1.6 -11.6
##  6 CentralPark_NY USW00094728 2016-01-06     0   5.0  -3.8
##  7 CentralPark_NY USW00094728 2016-01-07     0   7.8  -0.5
##  8 CentralPark_NY USW00094728 2016-01-08     0   7.8  -0.5
##  9 CentralPark_NY USW00094728 2016-01-09     0   8.3   4.4
## 10 CentralPark_NY USW00094728 2016-01-10   457  15.0   4.4
## # ... with 1,088 more rows

Make some plots

I’m going to work through some simple plots first.

p = ggplot(weather, aes(x = tmin, y = tmax))
p

p + geom_point()

ggplot(weather, aes(x = tmin, y = tmax)) +
  geom_point()

weather %>%
  ggplot(., aes(x = tmin, y = tmax)) +
  geom_point()

Adding aesthetics

Next I’m going to add some additional aesthetics to my simple plots.

ggplot(weather, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name))

ggplot(weather, aes(x = tmin, y = tmax, color = name)) + 
  geom_point()

ggplot(weather, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .5)

As part of this I’ll start to add extra geometries.

ggplot(weather, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess'

ggplot(weather, aes(x = tmin, y = tmax)) + 
  geom_point(alpha = .5) +
  geom_smooth(aes(color = name), se = FALSE)
## `geom_smooth()` using method = 'loess'

I can try faceting as a way to make plots more readable.

ggplot(weather, aes(x = tmin, y = tmax, color = name)) + 
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE) + 
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess'

ggplot(weather, aes(x = date, y = tmax, color = name, size = prcp)) + 
  geom_point(alpha = .5) + 
  geom_smooth(se = FALSE) + 
  facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess'
## Warning: Removed 2 rows containing missing values (geom_point).

First learning assessment; code below comes from Shuang.

weather %>%
  filter(name == "Waikiki_HA") %>%
  mutate(fmax = tmax * 9/5 + 32, 
         fmin = tmin * 9/5 + 32) %>%
  ggplot(aes(x = fmin, y = fmax)) +
  geom_point(alpha = .5) +
  geom_smooth(se = FALSE, method = "lm")

Second learning assessment; in the first example we set an aesthetic to a specific value, but in the second ggplot colors curves according to a variable that takes the value "blue" for all subjects.

ggplot(weather) + geom_point(aes(x = tmax, y = tmin), color = "blue", alpha = .4)

ggplot(weather) + geom_point(aes(x = tmax, y = tmin, color = "blue"))

Univariate plots

Bivariate plots (e.g. scatterplots) are great, but we also need to plot individual distributions.

ggplot(weather, aes(x = tmax)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(weather, aes(x = tmax, fill = name)) + 
  geom_histogram(position = "dodge", binwidth = 2)

ggplot(weather, aes(x = tmax, fill = name)) + 
  geom_density(alpha = .4, adjust = .5, color = "blue")

ggplot(weather, aes(x = name, y = tmax)) + 
  geom_boxplot()

ggplot(weather, aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), color = "blue", alpha = .5) + 
  stat_summary(fun.y = median, geom = "point", color = "blue", size = 4)

ggplot(weather, aes(x = tmax, y = name)) + 
  geom_density_ridges(scale = .85)
## Picking joint bandwidth of 1.68

Themes etc

Finally I’m going to play around with various themes to improve the quality of my graphics.

ggplot(weather, aes(x = tmin, y = tmax)) + 
  geom_point(aes(color = name), alpha = .5) + 
  labs(
    title = "Temperature plot",
    x = "Minimum daily temperature (C)",
    y = "Maxiumum daily temperature (C)",
    caption = "Data from the rnoaa package"
  ) + 
  theme_bw() + 
  theme(legend.position = "bottom")

weather %>%
  mutate(name = forcats::fct_relevel(name, c("Waikiki_HA", "CentralPark_NY", "Waterhole_WA"))) %>% 
  ggplot(aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), color = "blue", alpha = .5) + 
  stat_summary(fun.y = median, geom = "point", color = "blue", size = 4) + 
  theme(legend.position = "bottom")

weather %>%
  mutate(name = forcats::fct_reorder(name, tmax)) %>% 
  ggplot(aes(x = name, y = tmax)) + 
  geom_violin(aes(fill = name), color = "blue", alpha = .5) + 
  stat_summary(fun.y = median, geom = "point", color = "blue", size = 4) + 
  theme(legend.position = "bottom")