Intermediate Importing Data in R
Filip Schouwenaars
Instructor, DataCamp
R Core Team
Less consistent
Very comprehensive
All kinds of foreign data formats
SAS, STATA, SPSS, Systat, Weka …
install.packages("foreign")
library(foreign)
Cannot import .sas7bdat
Only SAS libraries: .xport
sas7bdat
package
STATA 5 to 12
read.dta()
- read.dta()
read.dta(file,
convert.factors = TRUE,
convert.dates = TRUE,
missing.type = FALSE)
ontime <- read.dta("ontime.dta")
ontime
Airline March_1999 June_1999 August_1999
1 TWA 84.4 69.4 85.0
2 Southwest 80.3 77.0 80.4
3 Northwest 80.8 75.1 81.0
4 American 72.7 65.1 78.3
5 Delta 78.7 72.2 77.7
6 Continental 79.3 68.4 75.1
7 United 78.6 69.2 71.6
8 US Airways 73.6 68.9 70.1
9 Alaska 71.9 75.4 64.4
10 American West 76.5 70.3 62.5
ontime <- read.dta("ontime.dta")
str(ontime)
convert.factors
TRUE by default'data.frame': 10 obs. of 4 variables:
$ Airline : Factor w/ 10 levels "Alaska",..: 8 7 6 2 5 4 ...
$ March_1999 : num 84.4 80.3 80.8 72.7 78.7 79.3 78.6 ...
$ June_1999 : num 69.4 77 75.1 65.1 72.2 68.4 69.2 68.9 ...
$ August_1999: num 85 80.4 81 78.3 77.7 75.1 71.6 70.1 ...
- attr(*, "datalabel")= chr "Written by R. "
- attr(*, "time.stamp")= chr ""
- attr(*, "formats")= chr "%9.0g" "%9.0g" "%9.0g" "%9.0g"
- attr(*, "types")= int 108 100 100 100
- attr(*, "val.labels")= chr "Airline" "" "" ""
- attr(*, "var.labels")= chr "Airline" "March_1999" ...
- attr(*, "version")= int 7
- attr(*, "label.table")=List of 1
..$ Airline: Named int 1 2 3 4 5 6 7 8 9 10
.. ..- attr(*, "names")= chr "Alaska" "American" ...
ontime <- read.dta("ontime.dta", convert.factors = FALSE)
str(ontime)
'data.frame': 10 obs. of 4 variables:
$ Airline : int 8 7 6 2 5 4 9 10 1 3
$ March_1999 : num 84.4 80.3 80.8 72.7 78.7 79.3 78.6 ...
$ June_1999 : num 69.4 77 75.1 65.1 72.2 68.4 69.2 68.9 ...
$ August_1999: num 85 80.4 81 78.3 77.7 75.1 71.6 70.1 ...
- attr(*, "datalabel")= chr "Written by R. "
- attr(*, "time.stamp")= chr ""
- attr(*, "formats")= chr "%9.0g" "%9.0g" "%9.0g" "%9.0g"
- attr(*, "types")= int 108 100 100 100
- attr(*, "val.labels")= chr "Airline" "" "" ""
- attr(*, "var.labels")= chr "Airline" "March_1999" ...
- attr(*, "version")= int 7
- attr(*, "label.table")=List of 1
..$ Airline: Named int 1 2 3 4 5 6 7 8 9 10
.. ..- attr(*, "names")= chr "Alaska" "American" ...
read.dta(file,
convert.factors = TRUE,
convert.dates = TRUE,
missing.type = FALSE)
convert.factors
: convert labelled STATA values to R factors
convert.dates
: convert STATA dates and times to Date and POSIXct
missing.type
:
FALSE
, convert all types of missing values to NATRUE
, store how values are missing in attributesread.spss()
read.spss(file,
use.value.labels = TRUE,
to.data.frame = FALSE)
use.value.labels
: convert labelled SPSS values to R factors
to.data.frame
: return data frame instead of a list
trim.factor.names
trim_values
use.missings
Intermediate Importing Data in R