-
Notifications
You must be signed in to change notification settings - Fork 0
/
TVAR estimation.R
122 lines (96 loc) · 3.79 KB
/
TVAR estimation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# TVAR estimation code adapted from Madhyastha et al (2011)
# enter df with column "y" as dependent and column "x" as threshold variable
# to estimate the TVAR model from our paper, use default
# to estimate the models from Madhyastha et al (2011) use
# for bilinear model: alpha = 1, phi = 1, beta = 2
# for ojive model: alpha = 2, phi = 1, beta = 0
# for combi model: alpha = 2, phi = 1, beta = 2
estTAR <- function(data, alpha = 2, phi = 2, beta = 2, minObs = 10) {
dat <- data
dat$yL <- c(NA, dat[-nrow(dat), "y"]) # lag y variable
dat$xL <- c(NA, dat[-nrow(dat), "x"]) # lag x variable (thresholding variable)
dat <- na.omit(dat) # remove first observations with NAs
n <- nrow(dat) # total number of observations
dat$dummy <- rep(1, n) # dummy variable for the intercept
dat <- dat[order(dat$xL), ] # order the data based on x_{t-1}
minObs <- round(n / minObs) # minimum % of observations in regime 1
maxObs <- n - minObs # maximum % of observations in regime 1
nObs <- minObs:maxObs # all possible number of observations in regime 1
nIter <- length(nObs) # number of iterations
tau <- dat$xL[nObs] # get all possible threshold values (based on x_t-1)
sav <- vector(mode = "list", length = nIter) # empty list to store results
ll <- numeric(nIter) # empty vector to store log likelihood
# n <- nrow(dat) # total number of observations without NAs
# create formula for the model
# (dependent on which parameter is regime-switching)
dep <- "y"
f <- "~ 0"
if(alpha == 1) {
f_alpha <- "dummy"
c_alpha <- "alpha"
} else {
f_alpha <- "dummy_1 + dummy_2"
c_alpha <- c("alpha_1", "alpha_2")
}
if(phi == 1) {
f_phi <- "yL"
c_phi <- "phi"
} else {
f_phi <- "yL_1 + yL_2"
c_phi <- c("phi_1", "phi_2")
}
if(beta == 1) {
f_beta <- "xL"
c_beta <- "beta"
} else {
f_beta <- "xL_1 + xL_2"
c_beta <- c("beta_1", "beta_2")
}
# paste formula together
# check if xL should be in the formula
if(beta != 0)
f <- as.formula(paste(dep, f, "+", f_alpha, "+", f_phi, "+", f_beta)) else
f <- as.formula(paste(dep, f, "+", f_alpha, "+", f_phi))
for(i in 1:nIter) {
# number of observations in regime 1 and regime 2
n_1 <- nObs[i]
n_2 <- n - n_1
# which observations are in regimes
reg_1 <- dat$xL <= tau[i]
reg_2 <- dat$xL > tau[i]
# dummy variable for intercept per regime
dat$dummy_1 <- as.numeric(reg_1)
dat$dummy_2 <- as.numeric(reg_2)
# regime specific variable for lag y
dat$yL_1 <- reg_1 * dat$yL
dat$yL_2 <- reg_2 * dat$yL
# regime specific variable for lag x
dat$xL_1 <- reg_1 * dat$xL
dat$xL_2 <- reg_2 * dat$xL
# estimate model without intercept but with dummy variables above
res <- lm(f, data = dat)
# calculate sums of squares residuals per regime
ssr_1 <- sum(resid(res)[1:n_1] ^ 2)
ssr_2 <- sum(resid(res)[(n_1 + 1):n] ^ 2)
# calculate log likelihood based on the regimes
ll[i] <- n_1 * log(ssr_1 / n_1) + n_2 * log(ssr_2 / n_2)
# save all results
sav[[i]] <- res
}
# check which model fit best (lowest log likelihood)
j <- which.min(ll)
# extract estimated coefficients
coefs <- sav[[j]]$coefficients
if(beta == 0) {
names(coefs) <- c(c_alpha, c_phi)
} else {
names(coefs) <- c(c_alpha, c_phi, c_beta)
}
# variable indicating which regime is active per measurement occasion
reg <- as.numeric(data[, 2] < tau[j]) + 1
# save predicted values
pred <- numeric()
pred[as.numeric(names(sav[[j]]$fitted.values))] <- sav[[j]]$fitted.values
return(list(threshold = tau[j], coefficients = coefs, result = sav[[j]],
regime = reg, predicted = pred))
}