-
Notifications
You must be signed in to change notification settings - Fork 0
/
estimate_profiles_mplus.R
396 lines (361 loc) · 15.6 KB
/
estimate_profiles_mplus.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
#' Estimate parameters for profiles for a specific solution (requires purchasing and installing MPlus to use)
#' @details Creates an mplus model (.inp) and associated data file (.dat)
#' @param data_filename name of data file to prepare; defaults to d.dat
#' @param script_filename name of script to prepare; defaults to i.inp
#' @param output_filename name of the output; defaults to o.out
#' @param savedata_filename name of the output for the save data (with the original data conditional probabilities); defaults to o-mod.out
#' @param the_title title of the model; defaults to test
#' @param starts number of initial stage starts and number of final stage optimizations; defaults to c(20, 4); can be set to be more conservative to c(500, 50)
#' @param m_iterations number of iterations for the EM algorithm; defaults to 500
#' @param st_iterations the number of initial stage iterations; defaults to 10; can be set more to be more conservative to 50
#' @param convergence_criterion convergence criterion for the Quasi-Newton algorithm for continuous outcomes; defaults to 1E-6 (.000001); can be set more conservatively to 1E-7 (.0000001)
#' @param remove_tmp_files whether to remove data, script, and output files; defaults to TRUE
#' @param print_input_file whether to print the input file to the console
#' @param return_save_data whether to return the save data (with the original data and the posterior probabilities for the classes and the class assignment) as a data.frame along with the MPlus output; defaults to TRUE
#' @param optseed random seed for analysis
#' @param include_LMR whether to include the Lo-Mendell-Rubin likelihood-ratio test; defaults to TRUE
#' @param include_BLRT whether to include the bootstrapped LRT; defaults to FALSE because of the time this takes to run
#' @param n_processors = 1
#' @inheritParams estimate_profiles
#' @import dplyr
#' @import tidyr
#' @importFrom tibble tibble
#' @examples
#' \dontrun{
#' m <- estimate_profiles_mplus(iris,
#' Sepal.Length, Sepal.Width, Petal.Length, Petal.Width,
#' n_profiles = 2,
#' model = 1)
#' }
#' @return either a tibble or a ggplot2 plot of the BIC values for the explored models
#' @export
estimate_profiles_mplus <- function(df,
...,
n_profiles,
the_title = "test",
data_filename = "d.dat",
script_filename = "i.inp",
output_filename = "i.out",
savedata_filename = "d-mod.dat",
model = 1,
starts = c(20, 4),
m_iterations = 500,
st_iterations = 10,
convergence_criterion = 1E-6,
remove_tmp_files = TRUE,
print_input_file = FALSE,
return_save_data = TRUE,
optseed = NULL,
n_processors = 1,
include_LMR = TRUE,
include_BLRT = FALSE) {
message("Note that this and other functions that use MPlus are at the experimental stage! Please provide feedback at https://github.com/jrosen48/tidyLPA")
d <- select_ancillary_functions_mplus(df, ...)
x <- utils::capture.output(suppressWarnings(MplusAutomation::prepareMplusData(d, data_filename, inpfile = FALSE)))
unquoted_variable_name <- paste0(names(d), collapse = " ")
var_list <- list()
for (i in 1:length(names(d))) {
var_list[[i]] <- names(d)[i]
}
TITLE <- paste0("TITLE: ", the_title)
DATA <- paste0("DATA: File is ", data_filename, ";")
VARIABLE_line0 <- "VARIABLE:"
VARIABLE_line1 <- paste0("Names are ", unquoted_variable_name, ";")
VARIABLE_line2 <- paste0("Classes = c(", n_profiles, ");")
ANALYSIS_line0 <- "ANALYSIS:"
ANALYSIS_line1 <- "Type is mixture;"
ANALYSIS_line2 <- paste0("start = ", starts[1], " ", starts[2], ";")
ANALYSIS_line3 <- paste0("miterations = ", m_iterations, ";")
ANALYSIS_line4 <- paste0("stiterations = ", st_iterations, ";")
ANALYSIS_line5 <- paste0("convergence = ", convergence_criterion, ";")
if (is.null(optseed)) {
ANALYSIS_line6 <- NULL
} else {
ANALYSIS_line6 <- paste0("optseed = ", optseed, ";")
}
ANALYSIS_line7 <- paste0("processors = ", n_processors, ";")
MODEL_overall_line000 <- paste0("! model specified is: ", model)
MODEL_overall_line00 <- paste0("MODEL:")
MODEL_overall_line0 <- paste0("%overall%")
MODEL_overall_line1 <- paste0("[", unquoted_variable_name, "];")
MODEL_overall_line2 <- paste0(unquoted_variable_name, ";")
if (include_LMR == TRUE) {
OUTPUT_line0 <- "OUTPUT: TECH1 TECH11;"
if (include_BLRT == TRUE) {
OUTPUT_line0 <- "OUTPUT: TECH1 TECH11 TECH14;"
}
} else {
OUTPUT_line0 <- "OUTPUT: TECH1;"
if (include_BLRT == TRUE) {
OUTPUT_line0 <- "OUTPUT: TECH1 TECH14;"
}
}
SAVEDATA_line0 <- paste0("SAVEDATA: File is ", savedata_filename, ";")
SAVEDATA_line1 <- "SAVE = CPROBABILITIES;"
if (model == 1) {
overall_collector <- list()
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(overall_collector)
overall_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], "@0;")
}
}
}
the_index <- 0
class_collector <- list()
for (i in 1:n_profiles) {
if (the_index != 0) {
the_index <- the_index + 1
}
class_collector[[the_index + 1]] <- paste0("%c#", i, "%")
class_collector[[the_index + 2]] <- paste0("[", unquoted_variable_name, "];")
class_collector[[the_index + 3]] <- paste0(unquoted_variable_name, "(", 1, "-", length(var_list), ");")
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(class_collector)
class_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], "@0;")
}
}
}
}
} else if (model == 2) {
overall_collector <- list()
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(overall_collector)
overall_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], ";")
}
}
}
the_index <- 0
class_collector <- list()
for (i in 1:n_profiles) {
if (the_index != 0) {
the_index <- the_index + 1
}
class_collector[[the_index + 1]] <- paste0("%c#", i, "%")
class_collector[[the_index + 2]] <- paste0("[", unquoted_variable_name, "];")
class_collector[[the_index + 3]] <- paste0(unquoted_variable_name, "(", 1, "-", length(var_list), ");")
temp_index <- 0
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(class_collector)
class_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], "(", length(var_list) + temp_index + 1, ");")
temp_index <- (temp_index + 1)
}
}
}
}
} else if (model == 3) {
overall_collector <- list()
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(overall_collector)
overall_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], "@0;")
}
}
}
the_index <- 0
class_collector <- list()
for (i in 1:n_profiles) {
if (the_index != 0) {
the_index <- the_index + 1
}
class_collector[[the_index + 1]] <- paste0("%c#", i, "%")
class_collector[[the_index + 2]] <- paste0("[", unquoted_variable_name, "];")
class_collector[[the_index + 3]] <- paste0(unquoted_variable_name, ";")
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(class_collector)
class_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], "@0;")
}
}
}
}
} else if (model == 4) {
overall_collector <- list()
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(overall_collector)
overall_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], ";")
}
}
}
the_index <- 0
class_collector <- list()
for (i in 1:n_profiles) {
if (the_index != 0) {
the_index <- the_index + 1
}
class_collector[[the_index + 1]] <- paste0("%c#", i, "%")
class_collector[[the_index + 2]] <- paste0("[", unquoted_variable_name, "];")
class_collector[[the_index + 3]] <- paste0(unquoted_variable_name, ";")
temp_index <- 0
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(class_collector)
class_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], "(", temp_index + 1, ");")
temp_index <- (temp_index + 1)
}
}
}
}
} else if (model == 5) {
overall_collector <- list()
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(overall_collector)
overall_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], ";")
}
}
}
the_index <- 0
class_collector <- list()
for (i in 1:n_profiles) {
if (the_index != 0) {
the_index <- the_index + 1
}
class_collector[[the_index + 1]] <- paste0("%c#", i, "%")
class_collector[[the_index + 2]] <- paste0("[", unquoted_variable_name, "];")
class_collector[[the_index + 3]] <- paste0(unquoted_variable_name, "(", 1, "-", length(var_list), ");")
temp_index <- 0
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(class_collector)
class_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], ";")
temp_index <- (temp_index + 1)
}
}
}
}
} else if (model == 6) {
overall_collector <- list()
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(overall_collector)
overall_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], ";")
}
}
}
the_index <- 0
class_collector <- list()
for (i in 1:n_profiles) {
if (the_index != 0) {
the_index <- the_index + 1
}
class_collector[[the_index + 1]] <- paste0("%c#", i, "%")
class_collector[[the_index + 2]] <- paste0("[", unquoted_variable_name, "];")
class_collector[[the_index + 3]] <- paste0(unquoted_variable_name, ";")
temp_index <- 0
for (j in 1:length(var_list)) {
for (k in j:length(var_list)) {
if (var_list[[j]] != var_list[[k]]) {
the_index <- length(class_collector)
class_collector[[the_index + 1]] <- paste0(var_list[[j]], " WITH ", var_list[[k]], ";")
temp_index <- (temp_index + 1)
}
}
}
}
}
readr::write_lines(
c(
TITLE,
DATA,
VARIABLE_line0, VARIABLE_line1, VARIABLE_line2,
MODEL_overall_line00, MODEL_overall_line0, MODEL_overall_line1, MODEL_overall_line2,
overall_collector,
class_collector,
ANALYSIS_line0, ANALYSIS_line1, ANALYSIS_line2, ANALYSIS_line3, ANALYSIS_line4, ANALYSIS_line5, ANALYSIS_line6, ANALYSIS_line7,
OUTPUT_line0,
SAVEDATA_line0,
SAVEDATA_line1
),
script_filename
)
if (run_model = TRUE) {
x <- utils::capture.output(MplusAutomation::runModels(target = paste0(getwd(), "/", script_filename)))
capture <- utils::capture.output(m <- MplusAutomation::readModels(target = paste0(getwd(), "/", output_filename)))
if (check_warnings(m, "WARNING: THE BEST LOGLIKELIHOOD VALUE WAS NOT REPLICATED. THE") == "Warning: The best loglikelihood was not replicated") {
warning_status <- "Warning: LL not replicated"
} else {
warning_status <- ""
}
if (check_errors(m, "THE MODEL ESTIMATION DID NOT TERMINATE NORMALLY DUE TO AN INSUFFICIENT") == "Error: Convergence issue" |
check_errors(m, "THE LOGLIKELIHOOD DECREASED IN THE LAST EM ITERATION. CHANGE YOUR MODEL") == "Error: Convergence issue" |
check_errors(m, "THE MODEL ESTIMATION DID NOT TERMINATE NORMALLY. ESTIMATES CANNOT") == "Error: Convergence issue" |
check_errors(m, "THE MODEL ESTIMATION DID NOT TERMINATE NORMALLY DUE TO AN ERROR IN THE") == "Error: Convergence issue") {
error_status <- "Error: Convergence issue"
} else {
error_status <- ""
}
if (error_status == "Error: Convergence issue" | warning_status == "Warning: LL not replicated") {
message(stringr::str_trim(stringr::str_c(warning_status, " ", error_status)))
return(stringr::str_trim(stringr::str_c(warning_status, " ", error_status)))
} else {
message("LogLik is ", round(abs(as.vector(m$summaries$LL)), 3))
message("BIC is ", round(abs(as.vector(m$summaries$BIC)), 3))
message("Entropy is ", round(abs(as.vector(m$summaries$Entropy)), 3))
}
if (print_input_file == TRUE) {
print(readr::read_lines(script_filename))
message("Note: This function currently prints the script output. You can also use the argument remove_tmp_files = FALSE to create the inp file, which you can then view in R Studio by clicking on the file name in the Files pane.")
}
if (return_save_data == TRUE) {
x <- dplyr::tbl_df(m$savedata)
# x <- dplyr::tbl_df(MplusAutomation::getSavedata_Data(paste0(getwd(), "/", output_filename)))
if (remove_tmp_files == TRUE) {
file.remove(data_filename)
file.remove(script_filename)
file.remove(output_filename)
file.remove(savedata_filename)
file.remove("Mplus Run Models.log")
}
return(x)
} else {
if (remove_tmp_files == TRUE) {
file.remove(data_filename)
file.remove(script_filename)
file.remove(output_filename)
file.remove("Mplus Run Models.log")
}
return(m)
}
}
}
#' Extract summary statistics from an Mplus model
#' @details Extract log likelihood, BIC, and entropy statistics from an Mplus model
#' @param x an mplus model
#' @return a tibble with summary statistics
#' @export
extract_mplus_summary <- function(x) {
x$summaries[c("LL", "BIC", "Entropy")]
}
check_list <- function(x, check) {
x[1] == check
}
check_warnings <- function(x, check) {
if (any(purrr::map_lgl(x$warnings, check_list, check = check))) {
return(stringr::str_c("Warning: ", "The best loglikelihood was not replicated"))
} else {
return("No warning")
}
}
check_errors <- function(x, check) {
if (any(purrr::map_lgl(x$errors, check_list, check = check))) {
return(stringr::str_c("Error: ", "Convergence issue"))
} else {
return("No error")
}
}