First I’ll load some packages.
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(ggridges)
library(ggthemes)
The code below downloads the weather data for today’s example. Note that I cache this chunk because it takes a while to run (also note that I loaded the other packages I need in a separate chunk – caching will keep the data but won’t reload any packages from the cached chunk).
library(rnoaa)
weather =
meteo_pull_monitors(c("USW00094728", "USC00519397", "USS0023B17S"),
var = c("PRCP", "TMIN", "TMAX"),
date_min = "2016-01-01",
date_max = "2016-12-31") %>%
mutate(
name = recode(id, USW00094728 = "CentralPark_NY",
USC00519397 = "Waikiki_HA",
USS0023B17S = "Waterhole_WA"),
tmin = tmin / 10,
tmax = tmax / 10) %>%
select(name, id, everything())
weather
## # A tibble: 1,098 x 6
## name id date prcp tmax tmin
## <chr> <chr> <date> <dbl> <dbl> <dbl>
## 1 CentralPark_NY USW00094728 2016-01-01 0 5.6 1.1
## 2 CentralPark_NY USW00094728 2016-01-02 0 4.4 0.0
## 3 CentralPark_NY USW00094728 2016-01-03 0 7.2 1.7
## 4 CentralPark_NY USW00094728 2016-01-04 0 2.2 -9.9
## 5 CentralPark_NY USW00094728 2016-01-05 0 -1.6 -11.6
## 6 CentralPark_NY USW00094728 2016-01-06 0 5.0 -3.8
## 7 CentralPark_NY USW00094728 2016-01-07 0 7.8 -0.5
## 8 CentralPark_NY USW00094728 2016-01-08 0 7.8 -0.5
## 9 CentralPark_NY USW00094728 2016-01-09 0 8.3 4.4
## 10 CentralPark_NY USW00094728 2016-01-10 457 15.0 4.4
## # ... with 1,088 more rows
I’m going to work through some simple plots first.
p = ggplot(weather, aes(x = tmin, y = tmax))
p
p + geom_point()
ggplot(weather, aes(x = tmin, y = tmax)) +
geom_point()
weather %>%
ggplot(., aes(x = tmin, y = tmax)) +
geom_point()
Next I’m going to add some additional aesthetics to my simple plots.
ggplot(weather, aes(x = tmin, y = tmax)) +
geom_point(aes(color = name))
ggplot(weather, aes(x = tmin, y = tmax, color = name)) +
geom_point()
ggplot(weather, aes(x = tmin, y = tmax, color = name)) +
geom_point(alpha = .5)
As part of this I’ll start to add extra geometries.
ggplot(weather, aes(x = tmin, y = tmax, color = name)) +
geom_point(alpha = .5) +
geom_smooth(se = FALSE)
## `geom_smooth()` using method = 'loess'
ggplot(weather, aes(x = tmin, y = tmax)) +
geom_point(alpha = .5) +
geom_smooth(aes(color = name), se = FALSE)
## `geom_smooth()` using method = 'loess'
I can try faceting as a way to make plots more readable.
ggplot(weather, aes(x = tmin, y = tmax, color = name)) +
geom_point(alpha = .5) +
geom_smooth(se = FALSE) +
facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess'
ggplot(weather, aes(x = date, y = tmax, color = name, size = prcp)) +
geom_point(alpha = .5) +
geom_smooth(se = FALSE) +
facet_grid(. ~ name)
## `geom_smooth()` using method = 'loess'
## Warning: Removed 2 rows containing missing values (geom_point).
First learning assessment; code below comes from Shuang.
weather %>%
filter(name == "Waikiki_HA") %>%
mutate(fmax = tmax * 9/5 + 32,
fmin = tmin * 9/5 + 32) %>%
ggplot(aes(x = fmin, y = fmax)) +
geom_point(alpha = .5) +
geom_smooth(se = FALSE, method = "lm")
Second learning assessment; in the first example we set an aesthetic to a specific value, but in the second ggplot
colors curves according to a variable that takes the value "blue"
for all subjects.
ggplot(weather) + geom_point(aes(x = tmax, y = tmin), color = "blue", alpha = .4)
ggplot(weather) + geom_point(aes(x = tmax, y = tmin, color = "blue"))
Bivariate plots (e.g. scatterplots) are great, but we also need to plot individual distributions.
ggplot(weather, aes(x = tmax)) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(weather, aes(x = tmax, fill = name)) +
geom_histogram(position = "dodge", binwidth = 2)
ggplot(weather, aes(x = tmax, fill = name)) +
geom_density(alpha = .4, adjust = .5, color = "blue")
ggplot(weather, aes(x = name, y = tmax)) +
geom_boxplot()
ggplot(weather, aes(x = name, y = tmax)) +
geom_violin(aes(fill = name), color = "blue", alpha = .5) +
stat_summary(fun.y = median, geom = "point", color = "blue", size = 4)
ggplot(weather, aes(x = tmax, y = name)) +
geom_density_ridges(scale = .85)
## Picking joint bandwidth of 1.68
Finally I’m going to play around with various themes to improve the quality of my graphics.
ggplot(weather, aes(x = tmin, y = tmax)) +
geom_point(aes(color = name), alpha = .5) +
labs(
title = "Temperature plot",
x = "Minimum daily temperature (C)",
y = "Maxiumum daily temperature (C)",
caption = "Data from the rnoaa package"
) +
theme_bw() +
theme(legend.position = "bottom")
weather %>%
mutate(name = forcats::fct_relevel(name, c("Waikiki_HA", "CentralPark_NY", "Waterhole_WA"))) %>%
ggplot(aes(x = name, y = tmax)) +
geom_violin(aes(fill = name), color = "blue", alpha = .5) +
stat_summary(fun.y = median, geom = "point", color = "blue", size = 4) +
theme(legend.position = "bottom")
weather %>%
mutate(name = forcats::fct_reorder(name, tmax)) %>%
ggplot(aes(x = name, y = tmax)) +
geom_violin(aes(fill = name), color = "blue", alpha = .5) +
stat_summary(fun.y = median, geom = "point", color = "blue", size = 4) +
theme(legend.position = "bottom")