library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(readr)
library(readxl)
library(haven)
This document looks at tools for importing data tables.
Load the Litters dataset using a relative path, and clean the names using janitor::clean_names()
.
litters_data = readr::read_csv(file = "./data/FAS_litters.csv")
## Parsed with column specification:
## cols(
## Group = col_character(),
## `Litter Number` = col_character(),
## `GD0 weight` = col_double(),
## `GD18 weight` = col_double(),
## `GD of Birth` = col_integer(),
## `Pups born alive` = col_integer(),
## `Pups dead @ birth` = col_integer(),
## `Pups survive` = col_integer()
## )
litters_data = janitor::clean_names(litters_data)
names(litters_data)
## [1] "group" "litter_number" "gd0_weight" "gd18_weight"
## [5] "gd_of_birth" "pups_born_alive" "pups_dead_birth" "pups_survive"
Load the Litters dataset using an absolute path, and clean the names using janitor::clean_names()
. The code chunk uses error = TRUE
so that this file will knit on other systems; otherwise the file won’t be found and the error will halt knitting.
litters_data = readr::read_csv(file = "/Users/jeffgoldsmith/Desktop/data_import/data/FAS_litters.csv")
## Parsed with column specification:
## cols(
## Group = col_character(),
## `Litter Number` = col_character(),
## `GD0 weight` = col_double(),
## `GD18 weight` = col_double(),
## `GD of Birth` = col_integer(),
## `Pups born alive` = col_integer(),
## `Pups dead @ birth` = col_integer(),
## `Pups survive` = col_integer()
## )
litters_data = janitor::clean_names(litters_data)
names(litters_data)
## [1] "group" "litter_number" "gd0_weight" "gd18_weight"
## [5] "gd_of_birth" "pups_born_alive" "pups_dead_birth" "pups_survive"
Load data and parse columns.
litters_data = read_csv(file = "./data/FAS_litters.csv",
col_types = "ccddiiii"
)
litters_data
## # A tibble: 49 x 8
## Group `Litter Number` `GD0 weight` `GD18 weight` `GD of Birth`
## <chr> <chr> <dbl> <dbl> <int>
## 1 Con7 #85 19.7 34.7 20
## 2 Con7 #1/2/95/2 27.0 42.0 19
## 3 Con7 #5/5/3/83/3-3 26.0 41.4 19
## 4 Con7 #5/4/2/95/2 28.5 44.1 19
## 5 Con7 #4/2/95/3-3 NA NA 20
## 6 Con7 #2/2/95/3-2 NA NA 20
## 7 Con7 #1/5/3/83/3-3/2 NA NA 20
## 8 Con8 #3/83/3-3 NA NA 20
## 9 Con8 #2/95/3 NA NA 20
## 10 Con8 #3/5/2/2/95 28.5 NA 20
## # ... with 39 more rows, and 3 more variables: `Pups born alive` <int>,
## # `Pups dead @ birth` <int>, `Pups survive` <int>
Here I’m still using a relative path.
library(readxl)
mlb11_data = read_excel("data/mlb11.xlsx", n_max = 20)
head(mlb11_data, 5)
## # A tibble: 5 x 12
## team runs at_bats hits homeruns bat_avg strikeouts
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Texas Rangers 855 5659 1599 210 0.283 930
## 2 Boston Red Sox 875 5710 1600 203 0.280 1108
## 3 Detroit Tigers 787 5563 1540 169 0.277 1143
## 4 Kansas City Royals 730 5672 1560 129 0.275 1006
## 5 St. Louis Cardinals 762 5532 1513 162 0.273 978
## # ... with 5 more variables: stolen_bases <dbl>, wins <dbl>,
## # new_onbase <dbl>, new_slug <dbl>, new_obs <dbl>
Finally I’m going to load a SAS database.
library(haven)
pulse_data = haven::read_sas("./data/public_pulse_data.sas7bdat")
names(pulse_data)
## [1] "ID" "age" "Sex" "BDIScore_BL"
## [5] "BDIScore_01m" "BDIScore_06m" "BDIScore_12m"
names(janitor::clean_names(pulse_data))
## [1] "id" "age" "sex" "bdiscore_bl"
## [5] "bdiscore_01m" "bdiscore_06m" "bdiscore_12m"
head(pulse_data, 5)
## # A tibble: 5 x 7
## ID age Sex BDIScore_BL BDIScore_01m BDIScore_06m BDIScore_12m
## <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 10003 48.03836 male 7 1 2 0
## 2 10015 72.47123 male 6 NA NA NA
## 3 10022 58.50959 male 14 3 8 NA
## 4 10026 72.72877 male 20 6 18 16
## 5 10035 60.40274 male 4 0 1 2
litters_data_base = read.csv("./data/FAS_litters.csv")
Tibbles will give limited output with printed. Traditional dataframes don’t behave the same way.
litters_data
## # A tibble: 49 x 8
## Group `Litter Number` `GD0 weight` `GD18 weight` `GD of Birth`
## <chr> <chr> <dbl> <dbl> <int>
## 1 Con7 #85 19.7 34.7 20
## 2 Con7 #1/2/95/2 27.0 42.0 19
## 3 Con7 #5/5/3/83/3-3 26.0 41.4 19
## 4 Con7 #5/4/2/95/2 28.5 44.1 19
## 5 Con7 #4/2/95/3-3 NA NA 20
## 6 Con7 #2/2/95/3-2 NA NA 20
## 7 Con7 #1/5/3/83/3-3/2 NA NA 20
## 8 Con8 #3/83/3-3 NA NA 20
## 9 Con8 #2/95/3 NA NA 20
## 10 Con8 #3/5/2/2/95 28.5 NA 20
## # ... with 39 more rows, and 3 more variables: `Pups born alive` <int>,
## # `Pups dead @ birth` <int>, `Pups survive` <int>
litters_data_base
## Group Litter.Number GD0.weight GD18.weight GD.of.Birth
## 1 Con7 #85 19.7 34.7 20
## 2 Con7 #1/2/95/2 27.0 42.0 19
## 3 Con7 #5/5/3/83/3-3 26.0 41.4 19
## 4 Con7 #5/4/2/95/2 28.5 44.1 19
## 5 Con7 #4/2/95/3-3 NA NA 20
## 6 Con7 #2/2/95/3-2 NA NA 20
## 7 Con7 #1/5/3/83/3-3/2 NA NA 20
## 8 Con8 #3/83/3-3 NA NA 20
## 9 Con8 #2/95/3 NA NA 20
## 10 Con8 #3/5/2/2/95 28.5 NA 20
## 11 Con8 #5/4/3/83/3 28.0 NA 19
## 12 Con8 #1/6/2/2/95-2 NA NA 20
## 13 Con8 #3/5/3/83/3-3-2 NA NA 20
## 14 Con8 #2/2/95/2 NA NA 19
## 15 Con8 #3/6/2/2/95-3 NA NA 20
## 16 Mod7 #59 17.0 33.4 19
## 17 Mod7 #103 21.4 42.1 19
## 18 Mod7 #1/82/3-2 NA NA 19
## 19 Mod7 #3/83/3-2 NA NA 19
## 20 Mod7 #2/95/2-2 NA NA 20
## 21 Mod7 #3/82/3-2 28.0 45.9 20
## 22 Mod7 #4/2/95/2 23.5 NA 19
## 23 Mod7 #5/3/83/5-2 22.6 37.0 19
## 24 Mod7 #8/110/3-2 NA NA 20
## 25 Mod7 #106 21.7 37.8 20
## 26 Mod7 #94/2 24.4 42.9 19
## 27 Mod7 #62 19.5 35.9 19
## 28 Low7 #84/2 24.3 40.8 20
## 29 Low7 #107 22.6 42.4 20
## 30 Low7 #85/2 22.2 38.5 20
## 31 Low7 #98 23.8 43.8 20
## 32 Low7 #102 22.6 43.3 20
## 33 Low7 #101 23.8 42.7 20
## 34 Low7 #111 25.5 44.6 20
## 35 Low7 #112 23.9 40.5 19
## 36 Mod8 #97 24.5 42.8 20
## 37 Mod8 #5/93 NA 41.1 20
## 38 Mod8 #5/93/2 NA NA 19
## 39 Mod8 #7/82-3-2 26.9 43.2 20
## 40 Mod8 #7/110/3-2 27.5 46.0 19
## 41 Mod8 #2/95/2 28.5 44.5 20
## 42 Mod8 #82/4 33.4 52.7 20
## 43 Low8 #53 21.8 37.2 20
## 44 Low8 #79 25.4 43.8 19
## 45 Low8 #100 20.0 39.2 20
## 46 Low8 #4/84 21.8 35.2 20
## 47 Low8 #108 25.6 47.5 20
## 48 Low8 #99 23.5 39.0 20
## 49 Low8 #110 25.5 42.7 20
## Pups.born.alive Pups.dead...birth Pups.survive
## 1 3 4 3
## 2 8 0 7
## 3 6 0 5
## 4 5 1 4
## 5 6 0 6
## 6 6 0 4
## 7 9 0 9
## 8 9 1 8
## 9 8 0 8
## 10 8 0 8
## 11 9 0 8
## 12 7 0 6
## 13 8 0 8
## 14 5 0 4
## 15 7 0 7
## 16 8 0 5
## 17 9 1 9
## 18 6 0 6
## 19 8 0 8
## 20 7 0 7
## 21 5 0 5
## 22 9 0 7
## 23 5 0 5
## 24 9 0 9
## 25 5 0 2
## 26 7 1 3
## 27 7 2 4
## 28 8 0 8
## 29 9 0 8
## 30 8 0 6
## 31 9 0 9
## 32 11 0 7
## 33 9 0 9
## 34 3 2 3
## 35 6 1 1
## 36 8 1 8
## 37 11 0 9
## 38 8 0 8
## 39 7 0 7
## 40 8 1 8
## 41 9 0 9
## 42 8 0 6
## 43 8 1 7
## 44 8 0 7
## 45 8 0 7
## 46 4 0 4
## 47 8 0 7
## 48 6 0 5
## 49 7 0 6