-
Notifications
You must be signed in to change notification settings - Fork 4
/
impute.rrcovna.r
28 lines (22 loc) · 925 Bytes
/
impute.rrcovna.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
library(rrcovNA)
set.seed(42)
# read training data
train <- read.csv('data/train.csv', sep=',', na.strings=c(''))
# drop some columns
train <- subset(train, select = -c(Cabin, Ticket, Name, Fare))
# correct some column types
train$Sex <- as.factor(train$Sex)
train$Embarked <- as.factor(train$Embarked)
# add explicit levels argument to make R use 0 and 1 as the levels instead of 1 and 2
train$Survived <- as.factor(train$Survived, levels=c(0,1))
summary(train)
str(train)
# impute missing multivariate data using sequential algorithm
train.imp_seq = impSeq(train)
summary(train.imp_seq)
str(train.imp_seq)
# Impute missing multivariate data using robust sequential algorithm using explicit default alpha
# we need to get the x column of the returned result to obtain the imputed dataframe
train.imp_seq_rob = impSeqRob(train, alpha=0.9)$x
summary(train.imp_seq_rob)
str(train.imp_seq_rob)