library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(readr)
library(readxl)
library(haven)

This document looks at tools for importing data tables.

CSV

Load the Litters dataset using a relative path, and clean the names using janitor::clean_names().

litters_data = readr::read_csv(file = "./data/FAS_litters.csv")
## Parsed with column specification:
## cols(
##   Group = col_character(),
##   `Litter Number` = col_character(),
##   `GD0 weight` = col_double(),
##   `GD18 weight` = col_double(),
##   `GD of Birth` = col_integer(),
##   `Pups born alive` = col_integer(),
##   `Pups dead @ birth` = col_integer(),
##   `Pups survive` = col_integer()
## )
litters_data = janitor::clean_names(litters_data)
names(litters_data)
## [1] "group"           "litter_number"   "gd0_weight"      "gd18_weight"    
## [5] "gd_of_birth"     "pups_born_alive" "pups_dead_birth" "pups_survive"

Load the Litters dataset using an absolute path, and clean the names using janitor::clean_names(). The code chunk uses error = TRUE so that this file will knit on other systems; otherwise the file won’t be found and the error will halt knitting.

litters_data = readr::read_csv(file = "/Users/jeffgoldsmith/Desktop/data_import/data/FAS_litters.csv")
## Parsed with column specification:
## cols(
##   Group = col_character(),
##   `Litter Number` = col_character(),
##   `GD0 weight` = col_double(),
##   `GD18 weight` = col_double(),
##   `GD of Birth` = col_integer(),
##   `Pups born alive` = col_integer(),
##   `Pups dead @ birth` = col_integer(),
##   `Pups survive` = col_integer()
## )
litters_data = janitor::clean_names(litters_data)
names(litters_data)
## [1] "group"           "litter_number"   "gd0_weight"      "gd18_weight"    
## [5] "gd_of_birth"     "pups_born_alive" "pups_dead_birth" "pups_survive"

Load data and parse columns.

litters_data = read_csv(file = "./data/FAS_litters.csv",
  col_types = "ccddiiii"
)
litters_data
## # A tibble: 49 x 8
##    Group `Litter Number` `GD0 weight` `GD18 weight` `GD of Birth`
##    <chr>           <chr>        <dbl>         <dbl>         <int>
##  1  Con7             #85         19.7          34.7            20
##  2  Con7       #1/2/95/2         27.0          42.0            19
##  3  Con7   #5/5/3/83/3-3         26.0          41.4            19
##  4  Con7     #5/4/2/95/2         28.5          44.1            19
##  5  Con7     #4/2/95/3-3           NA            NA            20
##  6  Con7     #2/2/95/3-2           NA            NA            20
##  7  Con7 #1/5/3/83/3-3/2           NA            NA            20
##  8  Con8       #3/83/3-3           NA            NA            20
##  9  Con8         #2/95/3           NA            NA            20
## 10  Con8     #3/5/2/2/95         28.5            NA            20
## # ... with 39 more rows, and 3 more variables: `Pups born alive` <int>,
## #   `Pups dead @ birth` <int>, `Pups survive` <int>

Read an Excel spreadsheet

Here I’m still using a relative path.

library(readxl)
mlb11_data = read_excel("data/mlb11.xlsx", n_max = 20)
head(mlb11_data, 5)
## # A tibble: 5 x 12
##                  team  runs at_bats  hits homeruns bat_avg strikeouts
##                 <chr> <dbl>   <dbl> <dbl>    <dbl>   <dbl>      <dbl>
## 1       Texas Rangers   855    5659  1599      210   0.283        930
## 2      Boston Red Sox   875    5710  1600      203   0.280       1108
## 3      Detroit Tigers   787    5563  1540      169   0.277       1143
## 4  Kansas City Royals   730    5672  1560      129   0.275       1006
## 5 St. Louis Cardinals   762    5532  1513      162   0.273        978
## # ... with 5 more variables: stolen_bases <dbl>, wins <dbl>,
## #   new_onbase <dbl>, new_slug <dbl>, new_obs <dbl>

Read a SAS database

Finally I’m going to load a SAS database.

library(haven)
pulse_data = haven::read_sas("./data/public_pulse_data.sas7bdat")
names(pulse_data)
## [1] "ID"           "age"          "Sex"          "BDIScore_BL" 
## [5] "BDIScore_01m" "BDIScore_06m" "BDIScore_12m"
names(janitor::clean_names(pulse_data))
## [1] "id"           "age"          "sex"          "bdiscore_bl" 
## [5] "bdiscore_01m" "bdiscore_06m" "bdiscore_12m"
head(pulse_data, 5)
## # A tibble: 5 x 7
##      ID      age   Sex BDIScore_BL BDIScore_01m BDIScore_06m BDIScore_12m
##   <dbl>    <dbl> <chr>       <dbl>        <dbl>        <dbl>        <dbl>
## 1 10003 48.03836  male           7            1            2            0
## 2 10015 72.47123  male           6           NA           NA           NA
## 3 10022 58.50959  male          14            3            8           NA
## 4 10026 72.72877  male          20            6           18           16
## 5 10035 60.40274  male           4            0            1            2

Compare with base R

litters_data_base = read.csv("./data/FAS_litters.csv")

Tibbles will give limited output with printed. Traditional dataframes don’t behave the same way.

litters_data
## # A tibble: 49 x 8
##    Group `Litter Number` `GD0 weight` `GD18 weight` `GD of Birth`
##    <chr>           <chr>        <dbl>         <dbl>         <int>
##  1  Con7             #85         19.7          34.7            20
##  2  Con7       #1/2/95/2         27.0          42.0            19
##  3  Con7   #5/5/3/83/3-3         26.0          41.4            19
##  4  Con7     #5/4/2/95/2         28.5          44.1            19
##  5  Con7     #4/2/95/3-3           NA            NA            20
##  6  Con7     #2/2/95/3-2           NA            NA            20
##  7  Con7 #1/5/3/83/3-3/2           NA            NA            20
##  8  Con8       #3/83/3-3           NA            NA            20
##  9  Con8         #2/95/3           NA            NA            20
## 10  Con8     #3/5/2/2/95         28.5            NA            20
## # ... with 39 more rows, and 3 more variables: `Pups born alive` <int>,
## #   `Pups dead @ birth` <int>, `Pups survive` <int>
litters_data_base
##    Group   Litter.Number GD0.weight GD18.weight GD.of.Birth
## 1   Con7             #85       19.7        34.7          20
## 2   Con7       #1/2/95/2       27.0        42.0          19
## 3   Con7   #5/5/3/83/3-3       26.0        41.4          19
## 4   Con7     #5/4/2/95/2       28.5        44.1          19
## 5   Con7     #4/2/95/3-3         NA          NA          20
## 6   Con7     #2/2/95/3-2         NA          NA          20
## 7   Con7 #1/5/3/83/3-3/2         NA          NA          20
## 8   Con8       #3/83/3-3         NA          NA          20
## 9   Con8         #2/95/3         NA          NA          20
## 10  Con8     #3/5/2/2/95       28.5          NA          20
## 11  Con8     #5/4/3/83/3       28.0          NA          19
## 12  Con8   #1/6/2/2/95-2         NA          NA          20
## 13  Con8 #3/5/3/83/3-3-2         NA          NA          20
## 14  Con8       #2/2/95/2         NA          NA          19
## 15  Con8   #3/6/2/2/95-3         NA          NA          20
## 16  Mod7             #59       17.0        33.4          19
## 17  Mod7            #103       21.4        42.1          19
## 18  Mod7       #1/82/3-2         NA          NA          19
## 19  Mod7       #3/83/3-2         NA          NA          19
## 20  Mod7       #2/95/2-2         NA          NA          20
## 21  Mod7       #3/82/3-2       28.0        45.9          20
## 22  Mod7       #4/2/95/2       23.5          NA          19
## 23  Mod7     #5/3/83/5-2       22.6        37.0          19
## 24  Mod7      #8/110/3-2         NA          NA          20
## 25  Mod7            #106       21.7        37.8          20
## 26  Mod7           #94/2       24.4        42.9          19
## 27  Mod7             #62       19.5        35.9          19
## 28  Low7           #84/2       24.3        40.8          20
## 29  Low7            #107       22.6        42.4          20
## 30  Low7           #85/2       22.2        38.5          20
## 31  Low7             #98       23.8        43.8          20
## 32  Low7            #102       22.6        43.3          20
## 33  Low7            #101       23.8        42.7          20
## 34  Low7            #111       25.5        44.6          20
## 35  Low7            #112       23.9        40.5          19
## 36  Mod8             #97       24.5        42.8          20
## 37  Mod8           #5/93         NA        41.1          20
## 38  Mod8         #5/93/2         NA          NA          19
## 39  Mod8       #7/82-3-2       26.9        43.2          20
## 40  Mod8      #7/110/3-2       27.5        46.0          19
## 41  Mod8         #2/95/2       28.5        44.5          20
## 42  Mod8           #82/4       33.4        52.7          20
## 43  Low8             #53       21.8        37.2          20
## 44  Low8             #79       25.4        43.8          19
## 45  Low8            #100       20.0        39.2          20
## 46  Low8           #4/84       21.8        35.2          20
## 47  Low8            #108       25.6        47.5          20
## 48  Low8             #99       23.5        39.0          20
## 49  Low8            #110       25.5        42.7          20
##    Pups.born.alive Pups.dead...birth Pups.survive
## 1                3                 4            3
## 2                8                 0            7
## 3                6                 0            5
## 4                5                 1            4
## 5                6                 0            6
## 6                6                 0            4
## 7                9                 0            9
## 8                9                 1            8
## 9                8                 0            8
## 10               8                 0            8
## 11               9                 0            8
## 12               7                 0            6
## 13               8                 0            8
## 14               5                 0            4
## 15               7                 0            7
## 16               8                 0            5
## 17               9                 1            9
## 18               6                 0            6
## 19               8                 0            8
## 20               7                 0            7
## 21               5                 0            5
## 22               9                 0            7
## 23               5                 0            5
## 24               9                 0            9
## 25               5                 0            2
## 26               7                 1            3
## 27               7                 2            4
## 28               8                 0            8
## 29               9                 0            8
## 30               8                 0            6
## 31               9                 0            9
## 32              11                 0            7
## 33               9                 0            9
## 34               3                 2            3
## 35               6                 1            1
## 36               8                 1            8
## 37              11                 0            9
## 38               8                 0            8
## 39               7                 0            7
## 40               8                 1            8
## 41               9                 0            9
## 42               8                 0            6
## 43               8                 1            7
## 44               8                 0            7
## 45               8                 0            7
## 46               4                 0            4
## 47               8                 0            7
## 48               6                 0            5
## 49               7                 0            6