diff --git a/.setup/build/betaSandwich.pdf b/.setup/build/betaSandwich.pdf index e133850..84c5ffb 100644 Binary files a/.setup/build/betaSandwich.pdf and b/.setup/build/betaSandwich.pdf differ diff --git a/.setup/build/betaSandwich_1.0.5.9000.tar.gz b/.setup/build/betaSandwich_1.0.5.9000.tar.gz index 513feac..43f6b65 100644 Binary files a/.setup/build/betaSandwich_1.0.5.9000.tar.gz and b/.setup/build/betaSandwich_1.0.5.9000.tar.gz differ diff --git a/.setup/latex/bib/bib.bib b/.setup/latex/bib/bib.bib deleted file mode 100644 index cd21073..0000000 --- a/.setup/latex/bib/bib.bib +++ /dev/null @@ -1,1450 +0,0 @@ -@Article{Craig-1936, - author = {Cecil C. Craig}, - date = {1936-03}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {On the frequency function of $xy$}, - doi = {10.1214/aoms/1177732541}, - number = {1}, - pages = {1--15}, - volume = {7}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Aroian-1947, - author = {Leo A. Aroian}, - date = {1947-06}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The probability function of the product of two normally distributed variables}, - doi = {10.1214/aoms/1177730442}, - number = {2}, - pages = {265--271}, - volume = {18}, - abstract = {Let $x$ and $y$ follow a normal bivariate probability function with means $\bar X, \bar Y$, standard deviations $\sigma_1, \sigma_2$, respectively, $r$ the coefficient of correlation, and $\rho_1 = \bar X/\sigma_1, \rho_2 = \bar Y/\sigma_2$. Professor C. C. Craig [1] has found the probability function of $z = xy/\sigma_1\sigma_2$ in closed form as the difference of two integrals. For purposes of numerical computation he has expanded this result in an infinite series involving powers of $z, \rho_1, \rho_2$, and Bessel functions of a certain type; in addition, he has determined the moments, semin-variants, and the moment generating function of $z$. However, for $\rho_1$ and $\rho_2$ large, as Craig points out, the series expansion converges very slowly. Even for $\rho_1$ and $\rho_2$ as small as 2, the expansion is unwieldy. We shall show that as $\rho_1$ and $\rho_2 \rightarrow \infty$, the probability function of $z$ approaches a normal curve and in case $r = 0$ the Type III function and the Gram-Charlier Type A series are excellent approximations to the $z$ distribution in the proper region. Numerical integration provides a substitute for the infinite series wherever the exact values of the probability function of $z$ are needed. Some extensions of the main theorem are given in section 5 and a practical problem involving the probability function of $z$ is solved.}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Cochran-1952, - author = {William G. Cochran}, - date = {1952-09}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The $\chi^{2}$ test of goodness of fit}, - doi = {10.1214/aoms/1177729380}, - number = {3}, - pages = {315--345}, - volume = {23}, - publisher = {Institute of Mathematical Statistics}, - abstract = {This paper contains an expository discussion of the chi square test of goodness of fit, intended for the student and user of statistical theory rather than for the expert. Part I describes the historical development of the distribution theory on which the test rests. Research bearing on the practical application of the test--in particular on the minimum expected number per class and the construction of classes--is discussed in Part II. Some varied opinions about the extent to which the test actually is useful to the scientist are presented in Part III. Part IV outlines a number of tests that have been proposed as substitutes for the chi square test (the $\omega^2$ test, the smooth test, the likelihood ratio test) and Part V a number of supplementary tests (the run test, tests based on low moments, subdivision of chi square into components).}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Goodman-1960, - author = {Leo A. Goodman}, - date = {1960-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {On the exact variance of products}, - doi = {10.1080/01621459.1960.10483369}, - number = {292}, - pages = {708--713}, - volume = {55}, - abstract = {A simple exact formula for the variance of the product of two random variables, say, x and y, is given as a function of the means and central product-moments of x and y. The usual approximate variance formula for xy is compared with this exact formula; e.g., we note, in the special case where x and y are independent, that the ``variance'' computed by the approximate formula is less than the exact variance, and that the accuracy of the approximation depends on the sum of the reciprocals of the squared coefficients of variation of x and y. The case where x and y need not be independent is also studied, and exact variance formulas are presented for several different ``product estimates.'' (The usefulness of exact formulas becomes apparent when the variances of these estimates are compared.) When x and y are independent, simple unbiased estimates of these exact variances are suggested; in the more general case, consistent estimates are presented.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Bradley-1978, - author = {James V. Bradley}, - date = {1978-11}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Robustness?}, - doi = {10.1111/j.2044-8317.1978.tb00581.x}, - number = {2}, - pages = {144--152}, - volume = {31}, - publisher = {Wiley}, - annotation = {robustness}, - abstract = {The actual behaviour of the probability of a Type I error under assumption violation is quite complex, depending upon a wide variety of interacting factors. Yet allegations of robustness tend to ignore its highly particularistic nature and neglect to mention important qualifying conditions. The result is often a vast overgeneralization which nevertheless is difficult to refute since a standard quantitative definition of what constitutes robustness does not exist. Yet under any halfway reasonable quantitative definition, many of the most prevalent claims of robustness would be demonstrably false. Therefore robustness is a highly questionable concept.}, -} - -@Article{Rubin-1976, - author = {Donald B. Rubin}, - date = {1976}, - journaltitle = {Biometrika}, - title = {Inference and missing data}, - doi = {10.1093/biomet/63.3.581}, - number = {3}, - pages = {581--592}, - volume = {63}, - publisher = {Oxford University Press ({OUP})}, - abstract = {When making sampling distribution inferences about the parameter of the data, $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are `missing at random' and the observed data are `observed at random', but these inferences are generally conditional on the observed pattern of missing data. When making direct-likelihood or Bayesian inferences about $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are missing at random and the parameter of the missing data process is `distinct' from $\theta$. These conditions are the weakest general conditions under which ignoring the process that causes missing data always leads to correct inferences.}, - publisher = {Oxford University Press ({OUP})}, -} - -@Article{Baron-Kenny-1986, - author = {Reuben M. Baron and David A. Kenny}, - date = {1986}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {The moderator-mediator variable distinction in social psychological research: Conceptual, strategic, and statistical considerations}, - doi = {10.1037/0022-3514.51.6.1173}, - number = {6}, - pages = {1173--1182}, - volume = {51}, - abstract = {In this article, we attempt to distinguish between the properties of moderator and mediator variables at a number of levels. First, we seek to make theorists and researchers aware of the importance of not using the terms moderator and mediator interchangeably by carefully elaborating, both conceptually and strategically, the many ways in which moderators and mediators differ. We then go beyond this largely pedagogical function and delineate the conceptual and strategic implications of making use of such distinctions with regard to a wide range of phenomena, including control and stress, attitudes, and personality traits. We also provide a specific compendium of analytic procedures appropriate for making the most effective use of the moderator and mediator distinction, both separately and in terms of a broader causal system that includes both moderators and mediators.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Browne-1984, - author = {Michael W. Browne}, - date = {1984-05}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Asymptotically distribution-free methods for the analysis of covariance structures}, - doi = {10.1111/j.2044-8317.1984.tb00789.x}, - number = {1}, - pages = {62--83}, - volume = {37}, - abstract = {Methods for obtaining tests of fit of structural models for covariance matrices and estimator standard error which are asymptotically distribution free are derived. Modifications to standard normal theory tests and standard errors which make them applicable to the wider class of elliptical distributions are provided. A random sampling experiment to investigate some of the proposed methods is described.}, - publisher = {Wiley}, -} - -@Article{Efron-1987, - author = {Bradley Efron}, - date = {1987-03}, - journaltitle = {Journal of the American Statistical Association}, - title = {Better bootstrap confidence intervals}, - doi = {10.1080/01621459.1987.10478410}, - number = {397}, - pages = {171--185}, - volume = {82}, - abstract = {We consider the problem of setting approximate confidence intervals for a single parameter $\theta$ in a multiparameter family. The standard approximate intervals based on maximum likelihood theory, $\hat{\theta} \pm \hat{\sigma} z^{\left( \alpha \right)}$, can be quite misleading. In practice, tricks based on transformations, bias corrections, and so forth, are often used to improve their accuracy. The bootstrap confidence intervals discussed in this article automatically incorporate such tricks without requiring the statistician to think them through for each new application, at the price of a considerable increase in computational effort. The new intervals incorporate an improvement over previously suggested methods, which results in second-order correctness in a wide variety of problems. In addition to parametric families, bootstrap intervals are also developed for nonparametric situations.}, - publisher = {Informa {UK} Limited}, - keywords = {resampling methods, approximate confidence intervals, transformations, nonparametric intervals, second-order theory, skewness corrections}, -} - -@Article{Efron-1988, - author = {Bradley Efron}, - date = {1988}, - journaltitle = {Psychological Bulletin}, - title = {Bootstrap confidence intervals: Good or bad?}, - doi = {10.1037/0033-2909.104.2.293}, - number = {2}, - pages = {293--296}, - volume = {104}, - abstract = {The bootstrap is a nonparametric technique for estimating standard errors and approximate confidence intervals. Rasmussen has used a simulation experiment to suggest that bootstrap confidence intervals perform very poorly in the estimation of a correlation coefficient. Part of Rasmussen's simulation is repeated. A careful look at the results shows the bootstrap intervals performing quite well. Some remarks are made concerning the virtues and defects of bootstrap intervals in general.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{James-Brett-1984, - author = {Lawrence R. James and Jeanne M. Brett}, - date = {1984}, - journaltitle = {Journal of Applied Psychology}, - title = {Mediators, moderators, and tests for mediation}, - doi = {10.1037/0021-9010.69.2.307}, - number = {2}, - pages = {307--321}, - volume = {69}, - abstract = {Discusses mediation relations in causal terms. Influences of an antecedent are transmitted to a consequence through an intervening mediator. Mediation relations may assume a number of functional forms, including nonadditive, nonlinear, and nonrecursive forms. Although mediation and moderation are distinguishable processes, with nonadditive forms (moderated mediation) a particular variable may be both a mediator and a moderator within a single set of functional relations. Current models for testing mediation relations in industrial and organizational psychology often involve an interplay between exploratory (correlational) statistical tests and causal inference. It is suggested that no middle ground exists between exploratory and confirmatory (causal) analysis and that attempts to explain how mediation processes occur require specified causal models.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Judd-Kenny-1981, - author = {Charles M. Judd and David A. Kenny}, - date = {1981-10}, - journaltitle = {Evaluation Review}, - title = {Process analysis}, - doi = {10.1177/0193841x8100500502}, - number = {5}, - pages = {602--619}, - volume = {5}, - abstract = {This article presents the rationale and procedures for conducting a process analysis in evaluation research. Such an analysis attempts to identify the process that mediates the effects of some treatment, by estimating the parameters of a causal chain between the treatment and some outcome variable. Two different procedures for estimating mediation are discussed. In addition we present procedures for examining whether a treatment exerts its effects, in part, by altering the mediating process that produces the outcome. Finally, the benefits of process analysis in evaluation research are underlined.}, - publisher = {{SAGE} Publications}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Micceri-1989, - author = {Theodore Micceri}, - date = {1989}, - journaltitle = {Psychological Bulletin}, - title = {The unicorn, the normal curve, and other improbable creatures}, - doi = {10.1037/0033-2909.105.1.156}, - number = {1}, - pages = {156--166}, - volume = {105}, - abtsract = {An investigation of the distributional characteristics of 440 large-sample achievement and psychometric measures found all to be significantly nonnormal at the alpha .01 significance level. Several classes of contamination were found, including tail weights from the uniform to the double exponential, exponential-level asymmetry, severe digit preferences, multimodalities, and modes external to the mean/median interval. Thus, the underlying tenets of normality-assuming statistics appear fallacious for these commonly used types of data. However, findings here also fail to support the types of distributions used in most prior robustness research suggesting the failure of such statistics under nonnormal conditions. A reevaluation of the statistical robustness literature appears appropriate in light of these findings.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Sobel-1982, - author = {Michael E. Sobel}, - date = {1982}, - journaltitle = {Sociological Methodology}, - title = {Asymptotic confidence intervals for indirect effects in structural equation models}, - doi = {10.2307/270723}, - pages = {290}, - volume = {13}, - publisher = {{JSTOR}}, -} - -@Article{Sobel-1986, - author = {Michael E. Sobel}, - date = {1986}, - journaltitle = {Sociological Methodology}, - title = {Some new results on indirect effects and their standard errors in covariance structure models}, - doi = {10.2307/270922}, - pages = {159}, - volume = {16}, - publisher = {{JSTOR}}, -} - -@Article{Sobel-1987, - author = {Michael E. Sobel}, - date = {1987-08}, - journaltitle = {Sociological Methods {\&} Research}, - title = {Direct and indirect effects in linear structural equation models}, - doi = {10.1177/0049124187016001006}, - number = {1}, - pages = {155--176}, - volume = {16}, - abstract = {This article discusses total indirect effects in linear structural equation models. First, I define these effects. Second, I show how the delta method may be used to obtain the standard errors of the sample estimates of these effects and test hypotheses about the magnitudes of the indirect effects. To keep matters simple, I focus throughout on a particularly simple linear structural equation system; for a treatment of the general case, see Sobel (1986). To illustrate the ideas and results, a detailed example is presented.}, - publisher = {{SAGE} Publications}, -} - -@Article{Venzon-Moolgavkar-1988, - author = {D. J. Venzon and S. H. Moolgavkar}, - date = {1988}, - journaltitle = {Applied Statistics}, - title = {A method for computing profile-likelihood-based confidence intervals}, - doi = {10.2307/2347496}, - number = {1}, - pages = {87}, - volume = {37}, - abstract = {The method of constructing confidence regions based on the generalised likelihood ratio statistic is well known for parameter vectors. A similar construction of a confidence interval for a single entry of a vector can be implemented by repeatedly maximising over the other parameters. We present an algorithm for finding these confidence interval endpoints that requires less computation. It employs a modified Newton-Raphson iteration to solve a system of equations that defines the endpoints.}, - publisher = {{JSTOR}}, - keywords = {confidence intervals, profile likelihood}, -} - -@Article{White-1980, - author = {Halbert White}, - date = {1980-05}, - journaltitle = {Econometrica}, - title = {A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity}, - doi = {10.2307/1912934}, - number = {4}, - pages = {817--838}, - volume = {48}, - abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.}, - publisher = {{JSTOR}}, -} - -@Book{Cohen-1988, - author = {Jacob Cohen}, - date = {1988}, - title = {Statistical power analysis for the behavioral sciences}, - doi = {10.4324/9780203771587}, - edition = {2}, - isbn = {9780203771587}, - publisher = {Routledge}, - library = {HA29 .C66 1988}, - keywords = {Social sciences--Statistical methods, Probabilities, Statistical power analysis}, - addendum = {https://lccn.loc.gov/88012110}, - abstract = {Statistical Power Analysis is a nontechnical guide to power analysis in research planning that provides users of applied statistics with the tools they need for more effective analysis. The Second Edition includes: \begin{itemize} \item a chapter covering power analysis in set correlation and multivariate methods; \item a chapter considering effect size, psychometric reliability, and the efficacy of ``qualifying'' dependent variables and; \item expanded power and sample size tables for multiple regression/correlation. \end{itemize}}, -} - -@Book{NationalResearchCouncil-1982, - author = {{National Research Council}}, - date = {1982-01}, - title = {An assessment of research-doctorate programs in the {United States}: Social and behavioral sciences}, - doi = {10.17226/9781}, - location = {Washington, D.C.}, - publisher = {National Academies Press}, - annotation = {data}, -} - -@Book{Rubin-1987, - author = {Donald B. Rubin}, - date = {1987-06}, - title = {Multiple imputation for nonresponse in surveys}, - doi = {10.1002/9780470316696}, - isbn = {9780470316696}, - location = {New York}, - publisher = {John Wiley {\&} Sons, Inc.}, - library = {HA31.2 .R83 1987}, - keywords = {Multiple imputation (Statistics), Nonresponse (Statistics), Social surveys--Response rate}, - addendum = {https://lccn.loc.gov/86028935}, - annotation = {Lib-Missing-Data-Books}, - abstract = {Demonstrates how nonresponse in sample surveys and censuses can be handled by replacing each missing value with two or more multiple imputations. Clearly illustrates the advantages of modern computing to such handle surveys, and demonstrates the benefit of this statistical technique for researchers who must analyze them. Also presents the background for Bayesian and frequentist theory. After establishing that only standard complete-data methods are needed to analyze a multiply-imputed set, the text evaluates procedures in general circumstances, outlining specific procedures for creating imputations in both the ignorable and nonignorable cases. Examples and exercises reinforce ideas, and the interplay of Bayesian and frequentist ideas presents a unified picture of modern statistics.}, -} - -@Article{Bollen-Stine-1990, - author = {Kenneth A. Bollen and Robert Stine}, - date = {1990}, - journaltitle = {Sociological Methodology}, - title = {Direct and indirect effects: Classical and bootstrap estimates of variability}, - doi = {10.2307/271084}, - pages = {115}, - volume = {20}, - abstract = {The decomposition of effects in structural equation models has been of considerable interest to social scientists. Finite-sample or asymptotic results for the sampling distribution of estimators of direct effects are widely available. Statistical inferences about indirect effects have relied exclusively on asymptotic methods which assume that the limiting distribution of the estimator is normal, with a standard error derived from the delta method. We examine bootstrap procedures as another way to generate standard errors and confidence intervals and to estimate the sampling distributions of estimators of direct and indirect effects. We illustrate the classical and the bootstrap methods with three empirical examples. We find that in a moderately large sample, the bootstrap distribution of an estimator is close to that assumed with the classical and delta methods but that in small samples, there are some differences. Bootstrap methods provide a check on the classical and delta methods when the latter are applied under less than ideal conditions.}, - publisher = {{JSTOR}}, -} - -@Article{Li-Raghunathan-Rubin-1991, - author = {K. H. Li and Trivellore Eachambadi Raghunathan and Donald B. Rubin}, - date = {1991-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {Large-sample significance levels from multiply imputed data using moment-based statistics and an {$F$} reference distribution}, - doi = {10.1080/01621459.1991.10475152}, - number = {416}, - pages = {1065--1073}, - volume = {86}, - abstract = {We present a procedure for computing significance levels from data sets whose missing values have been multiply imputed data. This procedure uses moment-based statistics, $m \leq 3$ repeated imputations, and an F reference distribution. When $m = \infty$, we show first that our procedure is essentially the same as the ideal procedure in cases of practical importance and, second, that its deviations from the ideal are basically a function of the coefficient of variation of the canonical ratios of complete to observed information. For small $m$ our procedure's performance is largely governed by this coefficient of variation and the mean of these ratios. Using simulation techniques with small $m$, we compare our procedure's actual and nominal large-sample significance levels and conclude that it is essentially calibrated and thus represents a definite improvement over previously available procedures. Furthermore, we compare the large-sample power of the procedure as a function of $m$ and other factors, such as the dimensionality of the estimand and fraction of missing information, to provide guidance on the choice of the number of imputations; generally, we find the loss of power due to small $m$ to be quite modest in cases likely to occur in practice.}, - publisher = {Informa {UK} Limited}, - keywords = {imputation, missing data, nonresponse, tests of significance}, - annotation = {missing, missing-mi}, -} - -@InBook{Arbuckle-1996, - author = {James L. Arbuckle}, - booktitle = {Advanced structural equation modeling}, - date = {1996}, - title = {Full information estimation in the presence of incomplete data}, - doi = {10.4324/9781315827414}, - editor = {George A. Marcoulides and Randall E. Schumacker}, -} - -@Book{Davison-Hinkley-1997, - author = {Anthony Christopher Davison and David Victor Hinkley}, - publisher = {Cambridge University Press}, - title = {Bootstrap methods and their application}, - series = {Cambridge Series in Statistical and Probabilistic Mathematics}, - date = {1997}, - location = {Cambridge and New York, NY, USA }, - doi = {10.1017/CBO9780511802843}, - isbn = {9780521573917}, - library = {QA276.8 .D38 1997}, - keywords = {Bootstrap (Statistics)}, - addendum = {https://lccn.loc.gov/96030064}, - abstract = {Bootstrap methods are computer-intensive methods of statistical analysis, which use simulation to calculate standard errors, confidence intervals, and significance tests. The methods apply for any level of modelling, and so can be used for fully parametric, semiparametric, and completely nonparametric analysis. This 1997 book gives a broad and up-to-date coverage of bootstrap methods, with numerous applied examples, developed in a coherent way with the necessary theoretical basis. Applications include stratified data; finite populations; censored and missing data; linear, nonlinear, and smooth regression models; classification; time series and spatial problems. Special features of the book include: extensive discussion of significance tests and confidence intervals; material on various diagnostic methods; and methods for efficient computation, including improved Monte Carlo simulation. Each chapter includes both practical and theoretical exercises. S-Plus programs for implementing the methods described in the text are available from the supporting website.}, - annotation = {bootstrap}, -} - -@Book{Efron-Tibshirani-1993, - author = {Bradley Efron and Robert J. Tibshirani}, - publisher = {Chapman \& Hall}, - title = {An introduction to the bootstrap}, - series = {Monographs on statistics and applied probability ; 57}, - date = {1993}, - location = {New York}, - doi = {10.1201/9780429246593}, - isbn = {9780412042317}, - library = {QA276.8 .E3745 1993}, - addendum = {https://lccn.loc.gov/93004489}, - abstract = {Statistics is a subject of many uses and surprisingly few effective practitioners. The traditional road to statistical knowledge is blocked, for most, by a formidable wall of mathematics. The approach in An Introduction to the Bootstrap avoids that wall. It arms scientists and engineers, as well as statisticians, with the computational techniques they need to analyze and understand complicated data sets.}, - keywords = {Bootstrap (Statistics)}, -} - -@Book{Schafer-1997, - author = {Joseph L. Schafer}, - date = {1997-08}, - title = {Analysis of incomplete multivariate data}, - doi = {10.1201/9780367803025}, - isbn = {9780367803025}, - abstract = {The last two decades have seen enormous developments in statistical methods for incomplete data. The EM algorithm and its extensions, multiple imputation, and Markov Chain Monte Carlo provide a set of flexible and reliable tools from inference in large classes of missing-data problems. Yet, in practical terms, those developments have had surprisingly little impact on the way most data analysts handle missing values on a routine basis. - Analysis of Incomplete Multivariate Data helps bridge the gap between theory and practice, making these missing-data tools accessible to a broad audience. It presents a unified, Bayesian approach to the analysis of incomplete multivariate data, covering datasets in which the variables are continuous, categorical, or both. The focus is applied, where necessary, to help readers thoroughly understand the statistical properties of those methods, and the behavior of the accompanying algorithms. - All techniques are illustrated with real data examples, with extended discussion and practical advice. All of the algorithms described in this book have been implemented by the author for general use in the statistical languages S and S Plus. The software is available free of charge on the Internet.}, - publisher = {Chapman and Hall/CRC}, -} - -@Article{Bauer-Preacher-Gil-2006, - author = {Daniel J. Bauer and Kristopher J. Preacher and Karen M. Gil}, - date = {2006}, - journaltitle = {Psychological Methods}, - title = {Conceptualizing and testing random indirect effects and moderated mediation in multilevel models: New procedures and recommendations}, - doi = {10.1037/1082-989x.11.2.142}, - number = {2}, - pages = {142--163}, - volume = {11}, - abstracts = {The authors propose new procedures for evaluating direct, indirect, and total effects in multilevel models when all relevant variables are measured at Level 1 and all effects are random. Formulas are provided for the mean and variance of the indirect and total effects and for the sampling variances of the average indirect and total effects. Simulations show that the estimates are unbiased under most conditions. Confidence intervals based on a normal approximation or a simulated sampling distribution perform well when the random effects are normally distributed but less so when they are nonnormally distributed. These methods are further developed to address hypotheses of moderated mediation in the multilevel context. An example demonstrates the feasibility and usefulness of the proposed methods.}, - publisher = {American Psychological Association ({APA})}, - keywords = {multilevel model, hierarchical linear model, indirect effect, mediation, moderated mediation}, -} - -@Article{Cheung-2009a, - author = {Mike W.-L. Cheung}, - date = {2009-05}, - journaltitle = {Behavior Research Methods}, - title = {Comparison of methods for constructing confidence intervals of standardized indirect effects}, - doi = {10.3758/brm.41.2.425}, - number = {2}, - pages = {425--438}, - volume = {41}, - abstract = {Mediation models are often used as a means to explain the psychological mechanisms between an independent and a dependent variable in the behavioral and social sciences. A major limitation of the unstandardized indirect effect calculated from raw scores is that it cannot be interpreted as an effect-size measure. In contrast, the standardized indirect effect calculated from standardized scores can be a good candidate as a measure of effect size because it is scale invariant. In the present article, 11 methods for constructing the confidence intervals (CIs) of the standardized indirect effects were evaluated via a computer simulation. These included six Wald CIs, three bootstrap CIs, one likelihood-based CI, and the PRODCLIN CI. The results consistently showed that the percentile bootstrap, the bias-corrected bootstrap, and the likelihood-based approaches had the best coverage probability. Mplus, LISREL, and Mx syntax were included to facilitate the use of these preferred methods in applied settings. Future issues on the use of the standardized indirect effects are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, coverage probability, structural equation modeling approach}, -} - -@Article{Cheung-2009b, - author = {Mike W.-L. Cheung}, - date = {2009-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Constructing approximate confidence intervals for parameters with structural equation models}, - doi = {10.1080/10705510902751291}, - number = {2}, - pages = {267--294}, - volume = {16}, - abstract = {Confidence intervals (CIs) for parameters are usually constructed based on the estimated standard errors. These are known as Wald CIs. This article argues that likelihood-based CIs (CIs based on likelihood ratio statistics) are often preferred to Wald CIs. It shows how the likelihood-based CIs and the Wald CIs for many statistics and psychometric indexes can be constructed with the use of phantom variables (Rindskopf, 1984) in some of the current structural equation modeling (SEM) packages. The procedures to form CIs for the differences in correlation coefficients, squared multiple correlations, indirect effects, coefficient alphas, and reliability estimates are illustrated. A simulation study on the Pearson correlation is used to demonstrate the advantages of the likelihood-based CI over the Wald CI. Issues arising from this SEM approach and extensions of this approach are discussed.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Cheung-Lau-2007, - author = {Gordon W. Cheung and Rebecca S. Lau}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Testing mediation and suppression effects of latent variables}, - doi = {10.1177/1094428107300343}, - number = {2}, - pages = {296--325}, - volume = {11}, - abstract = {Because of the importance of mediation studies, researchers have been continuously searching for the best statistical test for mediation effect. The approaches that have been most commonly employed include those that use zero-order and partial correlation, hierarchical regression models, and structural equation modeling (SEM). This study extends MacKinnon and colleagues (MacKinnon, Lockwood, Hoffmann, West, \& Sheets, 2002; MacKinnon, Lockwood, \& Williams, 2004, MacKinnon, Warsi, \& Dwyer, 1995) works by conducting a simulation that examines the distribution of mediation and suppression effects of latent variables with SEM, and the properties of confidence intervals developed from eight different methods. Results show that SEM provides unbiased estimates of mediation and suppression effects, and that the bias-corrected bootstrap confidence intervals perform best in testing for mediation and suppression effects. Steps to implement the recommended procedures with Amos are presented.}, - publisher = {{SAGE} Publications}, - keywords = {mediating effects, suppression effects, structural equation modeling}, -} - -@Article{CribariNeto-Souza-Vasconcellos-2007, - author = {Francisco Cribari-Neto and Tatiene C. Souza and Klaus L. P. Vasconcellos}, - date = {2007-08}, - journaltitle = {Communications in Statistics - Theory and Methods}, - title = {Inference under heteroskedasticity and leveraged data}, - doi = {10.1080/03610920601126589}, - number = {10}, - pages = {1877--1888}, - volume = {36}, - abstract = {We evaluate the finite-sample behavior of different heteros-ke-das-ticity-consistent covariance matrix estimators, under both constant and unequal error variances. We consider the estimator proposed by Halbert White (HC0), and also its variants known as HC2, HC3, and HC4; the latter was recently proposed by Cribari-Neto (2004). We propose a new covariance matrix estimator: HC5. It is the first consistent estimator to explicitly take into account the effect that the maximal leverage has on the associated inference. Our numerical results show that quasi-$t$ inference based on HC5 is typically more reliable than inference based on other covariance matrix estimators.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Fritz-MacKinnon-2007, - author = {Matthew S. Fritz and David P. MacKinnon}, - date = {2007-03}, - journaltitle = {Psychological Science}, - title = {Required sample size to detect the mediated effect}, - doi = {10.1111/j.1467-9280.2007.01882.x}, - number = {3}, - pages = {233--239}, - volume = {18}, - abstract = {Mediation models are widely used, and there are many tests of the mediated effect. One of the most common questions that researchers have when planning mediation studies is, ``How many subjects do I need to achieve adequate power when testing for mediation?'' This article presents the necessary sample sizes for six of the most common and the most recommended tests of mediation for various combinations of parameters, to provide a guide for researchers when designing studies or applying for grants.}, - publisher = {{SAGE} Publications}, - keywords = {bootstrap, collinearity, mediation analysis, power, tolerance}, -} - -@Article{Graham-Olchowski-Gilreath-2007, - author = {John W. Graham and Allison E. Olchowski and Tamika D. Gilreath}, - date = {2007-06}, - journaltitle = {Prevention Science}, - title = {How many imputations are really needed? Some practical clarifications of multiple imputation theory}, - doi = {10.1007/s11121-007-0070-9}, - number = {3}, - pages = {206--213}, - volume = {8}, - abstract = {Multiple imputation (MI) and full information maximum likelihood (FIML) are the two most common approaches to missing data analysis. In theory, MI and FIML are equivalent when identical models are tested using the same variables, and when m, the number of imputations performed with MI, approaches infinity. However, it is important to know how many imputations are necessary before MI and FIML are sufficiently equivalent in ways that are important to prevention scientists. MI theory suggests that small values of m, even on the order of three to five imputations, yield excellent results. Previous guidelines for sufficient m are based on relative efficiency, which involves the fraction of missing information ($\gamma$) for the parameter being estimated, and m. In the present study, we used a Monte Carlo simulation to test MI models across several scenarios in which $\gamma$ and m were varied. Standard errors and p-values for the regression coefficient of interest varied as a function of m, but not at the same rate as relative efficiency. Most importantly, statistical power for small effect sizes diminished as m became smaller, and the rate of this power falloff was much greater than predicted by changes in relative efficiency. Based our findings, we recommend that researchers using MI should perform many more imputations than previously considered sufficient. These recommendations are based on $\gamma$, and take into consideration one's tolerance for a preventable power falloff (compared to FIML) due to using too few imputations.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {multiple imputation, number of imputations, full information maximum likelihood, missing data, statistical power}, -} - -@Article{MacKinnon-Fritz-Williams-etal-2007, - author = {David P. MacKinnon and Matthew S. Fritz and Jason Williams and Chondra M. Lockwood}, - date = {2007-08}, - journaltitle = {Behavior Research Methods}, - title = {Distribution of the product confidence limits for the indirect effect: Program {PRODCLIN}}, - doi = {10.3758/bf03193007}, - number = {3}, - pages = {384--389}, - volume = {39}, - abstract = {This article describes a program, PRODCLIN (distribution of the PRODuct Confidence Limits for INdirect effects), written for SAS, SPSS, and R, that computes confidence limits for the product of two normal random variables. The program is important because it can be used to obtain more accurate confidence limits for the indirect effect, as demonstrated in several recent articles (MacKinnon, Lockwood, \& Williams, 2004; Pituch, Whittaker, \& Stapleton, 2005). Tests of the significance of and confidence limits for indirect effects based on the distribution of the product method have more accurate Type I error rates and more power than other, more commonly used tests. Values for the two paths involved in the indirect effect and their standard errors are entered in the PRODCLIN program, and distribution of the product confidence limits are computed. Several examples are used to illustrate the PRODCLIN program. The PRODCLIN programs in rich text format may be downloaded from www.psychonomic.org/archive.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{MacKinnon-Lockwood-Hoffman-etal-2002, - author = {David P. MacKinnon and Chondra M. Lockwood and Jeanne M. Hoffman and Stephen G. West and Virgil Sheets}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {A comparison of methods to test mediation and other intervening variable effects}, - doi = {10.1037/1082-989x.7.1.83}, - number = {1}, - pages = {83--104}, - volume = {7}, - abstract = {A Monte Carlo study compared 14 methods to test the statistical significance of the intervening variable effect. An intervening variable (mediator) transmits the effect of an independent variable to a dependent variable. The commonly used R. M. Baron and D. A. Kenny (1986) approach has low statistical power. Two methods based on the distribution of the product and 2 difference-in-coefficients methods have the most accurate Type I error rates and greatest statistical power except in 1 important case in which Type I error rates are too high. The best balance of Type I error and statistical power across all cases is the test of the joint significance of the two effects comprising the intervening variable effect.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{MacKinnon-Lockwood-Williams-2004, - author = {David P. MacKinnon and Chondra M. Lockwood and Jason Williams}, - date = {2004-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {Confidence limits for the indirect effect: Distribution of the product and resampling methods}, - doi = {10.1207/s15327906mbr3901_4}, - number = {1}, - pages = {99--128}, - volume = {39}, - abstract = {The most commonly used method to test an indirect effect is to divide the estimate of the indirect effect by its standard error and compare the resulting z statistic with a critical value from the standard normal distribution. Confidence limits for the indirect effect are also typically based on critical values from the standard normal distribution. This article uses a simulation study to demonstrate that confidence limits are imbalanced because the distribution of the indirect effect is normal only in special cases. Two alternatives for improving the performance of confidence limits for the indirect effect are evaluated: (a) a method based on the distribution of the product of two normal random variables, and (b) resampling methods. In Study 1, confidence limits based on the distribution of the product are more accurate than methods based on an assumed normal distribution but confidence limits are still imbalanced. Study 2 demonstrates that more accurate confidence limits are obtained using resampling methods, with the bias-corrected bootstrap the best method overall.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-montecarlo, mediation-prodclin}, -} - -@Article{Peugh-Enders-2004, - author = {James L. Peugh and Craig K. Enders}, - date = {2004-12}, - journaltitle = {Review of Educational Research}, - title = {Missing data in educational research: A review of reporting practices and suggestions for improvement}, - doi = {10.3102/00346543074004525}, - number = {4}, - pages = {525--556}, - volume = {74}, - publisher = {American Educational Research Association ({AERA})}, - abstract = {Missing data analyses have received considerable recent attention in the methodological literature, and two ``modern'' methods, multiple imputation and maximum likelihood estimation, are recommended. The goals of this article are to (a) provide an overview of missing-data theory, maximum likelihood estimation, and multiple imputation; (b) conduct a methodological review of missing-data reporting practices in 23 applied research journals; and (c) provide a demonstration of multiple imputation and maximum likelihood estimation using the Longitudinal Study of American Youth data. The results indicated that explicit discussions of missing data increased substantially between 1999 and 2003, but the use of maximum likelihood estimation or multiple imputation was rare; the studies relied almost exclusively on listwise and pairwise deletion.}, - keywords = {EM algorithm, maximum likelihood estimation, missing data, multiple imputation, NORM}, -} - -@Article{Preacher-Hayes-2004, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2004-11}, - journaltitle = {Behavior Research Methods, Instruments, \& Computers}, - title = {{SPSS} and {SAS} procedures for estimating indirect effects in simple mediation models}, - doi = {10.3758/bf03206553}, - number = {4}, - pages = {717--731}, - volume = {36}, - abstract = {Researchers often conduct mediation analysis in order to indirectly assess the effect of a proposed cause on some outcome through a proposed mediator. The utility of mediation analysis stems from its ability to go beyond the merely descriptive to a more functional understanding of the relationships among variables. A necessary component of mediation is a statistically and practically significant indirect effect. Although mediation hypotheses are frequently explored in psychological research, formal significance tests of indirect effects are rarely conducted. After a brief overview of mediation, we argue the importance of directly testing the significance of indirect effects and provide SPSS and SAS macros that facilitate estimation of the indirect effect with a normal theory approach and a bootstrap approach to obtaining confidence intervals, as well as the traditional approach advocated by Baron and Kenny (1986). We hope that this discussion and the macros will enhance the frequency of formal mediation tests in the psychology literature. Electronic copies of these macros may be downloaded from the Psychonomic Society’s Web archive at www.psychonomic.org/archive/.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {life satisfaction, indirect effect, mediation analysis, cognitive therapy, Sobel test}, -} - -@Article{Preacher-Hayes-2008, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2008-08}, - journaltitle = {Behavior Research Methods}, - title = {Asymptotic and resampling strategies for assessing and comparing indirect effects in multiple mediator models}, - doi = {10.3758/brm.40.3.879}, - number = {3}, - pages = {879--891}, - volume = {40}, - abstract = {Hypotheses involving mediation are common in the behavioral sciences. Mediation exists when a predictor affects a dependent variable indirectly through at least one intervening variable, or mediator. Methods to assess mediation involving multiple simultaneous mediators have received little attention in the methodological literature despite a clear need. We provide an overview of simple and multiple mediation and explore three approaches that can be used to investigate indirect processes, as well as methods for contrasting two or more mediators within a single model. We present an illustrative example, assessing and contrasting potential mediators of the relationship between the helpfulness of socialization agents and job satisfaction. We also provide SAS and SPSS macros, as well as Mplus and LISREL syntax, to facilitate the use of these methods in applications.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {indirect effect, structural equation modeling, residual covariance, total indirect effect, multiple mediator model}, -} - -@Article{Raghunathan-Lepkowski-Hoewyk-etal-2001, - author = {Trivellore E. Raghunathan and James M. Lepkowski and John Van Hoewyk and Peter Solenberger}, - date = {2001}, - journaltitle = {Survey Methodology}, - title = {A multivariate technique for multiply imputing missing values using a sequence of regression models}, - number = {1}, - pages = {85--95}, - volume = {27}, - abstract = {This article describes and evaluates a procedure for imputing missing values for a relatively complex data structure when the data are missing at random. The imputations are obtained by fitting a sequence of regression models and drawing values from the corresponding predictive distributions. The types of regression models used are linear, logistic, Poisson, generalized logit or a mixture of these depending on the type of variable being imputed. Two additional common features in the imputation process are incorporated: restriction to a relevant subpopulation for some variables and logical bounds or constraints for the imputed values. The restrictions involve subsetting the sample individuals that satisfy certain criteria while fitting the regression models. The bounds involve drawing values from a truncated predictive distribution. The development of this method was partly motivated by the analysis of two data sets which are used as illustrations. The sequential regression procedure is applied to perform multiple imputation analysis for the two applied problems. The sampling properties of inferences from multiply imputed data sets created using the sequential regression method are evaluated through simulated data sets.}, - keywords = {item nonresponse, missing at random, multiple imputation, nonignorable missing mechanism, regression, sampling properties and simulations}, -} - -@Article{Schafer-Graham-2002, - author = {Joseph L. Schafer and John W. Graham}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Missing data: Our view of the state of the art}, - doi = {10.1037/1082-989x.7.2.147}, - number = {2}, - pages = {147--177}, - volume = {7}, - abstract = {Statistical procedures for missing data have vastly improved, yet misconception and unsound practice still abound. The authors frame the missing-data problem, review methods, offer advice, and raise issues that remain unresolved. They clear up common misunderstandings regarding the missing at random (MAR) concept. They summarize the evidence against older procedures and, with few exceptions, discourage their use. They present, in both technical and practical language, 2 general approaches that come highly recommended: maximum likelihood (ML) and Bayesian multiple imputation (MI). Newer developments are discussed, including some for dealing with missing data that are not MAR. Although not yet in the mainstream, these procedures may eventually extend the ML and MI methods that currently represent the state of the art.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Serlin-2000, - author = {Ronald C. Serlin}, - date = {2000}, - journaltitle = {Psychological Methods}, - title = {Testing for robustness in {Monte Carlo} studies}, - doi = {10.1037/1082-989x.5.2.230}, - number = {2}, - pages = {230--240}, - volume = {5}, - abstract = {Monte Carlo studies provide the information needed to help researchers select appropriate analytical procedures under design conditions in which the underlying assumptions of the procedures are not met. In Monte Carlo studies, the 2 errors that one could commit involve (a) concluding that a statistical procedure is robust when it is not or (b) concluding that it is not robust when it is. In previous attempts to apply standard statistical design principles to Monte Carlo studies, the less severe of these errors has been wrongly designated the Type I error. In this article, a method is presented for controlling the appropriate Type I error rate; the determination of the number of iterations required in a Monte Carlo study to achieve desired power is described; and a confidence interval for a test's true Type I error rate is derived. A robustness criterion is also proposed that is a compromise between W. G. Cochran's (1952) and J. V. Bradley's (1978) criteria.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Shrout-Bolger-2002, - author = {Patrick E. Shrout and Niall Bolger}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Mediation in experimental and nonexperimental studies: New procedures and recommendations}, - doi = {10.1037/1082-989x.7.4.422}, - number = {4}, - pages = {422--445}, - volume = {7}, - publisher = {American Psychological Association ({APA})}, - abstract = {Mediation is said to occur when a causal effect of some variable $X$ on an outcome $Y$ is explained by some intervening variable $M$. The authors recommend that with small to moderate samples, bootstrap methods (B. Efron \& R. Tibshirani, 1993) be used to assess mediation. Bootstrap tests are powerful because they detect that the sampling distribution of the mediated effect is skewed away from 0. They argue that R. M. Baron and D. A. Kenny's (1986) recommendation of first testing the $X \to Y$ association for statistical significance should not be a requirement when there is a priori belief that the effect size is small or suppression is a possibility. Empirical examples and computer setups for bootstrap analyses are provided.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Taylor-MacKinnon-Tein-2007, - author = {Aaron B. Taylor and David P. MacKinnon and Jenn-Yun Tein}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Tests of the three-path mediated effect}, - doi = {10.1177/1094428107300344}, - number = {2}, - pages = {241--269}, - volume = {11}, - abstract = {In a three-path mediational model, two mediators intervene in a series between an independent and a dependent variable. Methods of testing for mediation in such a model are generalized from the more often used single-mediator model. Six such methods are introduced and compared in a Monte Carlo study in terms of their Type I error, power, and coverage. Based on its results, the joint significance test is preferred when only a hypothesis test is of interest. The percentile bootstrap and bias-corrected bootstrap are preferred when a confidence interval on the mediated effect is desired, with the latter having more power but also slightly inflated Type I error in some conditions.}, - publisher = {{SAGE} Publications}, - keywords = {mediation, bootstrapping}, -} - -@Article{vanBuuren-Brand-GroothuisOudshoorn-etal-2006, - author = {Stef {van Buuren} and J. P. L. Brand and C. G. M. Groothuis-Oudshoorn and Donald B. Rubin}, - date = {2006-12}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Fully conditional specification in multivariate imputation}, - doi = {10.1080/10629360600810434}, - number = {12}, - pages = {1049--1064}, - volume = {76}, - abstract = {The use of the Gibbs sampler with fully conditionally specified models, where the distribution of each variable given the other variables is the starting point, has become a popular method to create imputations in incomplete multivariate data. The theoretical weakness of this approach is that the specified conditional densities can be incompatible, and therefore the stationary distribution to which the Gibbs sampler attempts to converge may not exist. This study investigates practical consequences of this problem by means of simulation. Missing data are created under four different missing data mechanisms. Attention is given to the statistical behavior under compatible and incompatible models. The results indicate that multiple imputation produces essentially unbiased estimates with appropriate coverage in the simple cases investigated, even for the incompatible models. Of particular interest is that these results were produced using only five Gibbs iterations starting from a simple draw from observed marginal distributions. It thus appears that, despite the theoretical weaknesses, the actual performance of conditional model specification for multivariate imputation can be quite good, and therefore deserves further study.}, - publisher = {Informa {UK} Limited}, - keywords = {multivariate missing data, multiple imputation, distributional compatibility, Gibbs sampling, simulation, proper imputation}, -} - -@Article{Yuan-Bentler-2000, - author = {Ke-Hai Yuan and Peter M. Bentler}, - date = {2000-08}, - journaltitle = {Sociological Methodology}, - title = {Three likelihood-based methods for mean and covariance structure analysis with nonnormal missing data}, - doi = {10.1111/0081-1750.00078}, - number = {1}, - pages = {165--200}, - volume = {30}, - abstract = {Survey and longitudinal studies in the social and behavioral sciences generally contain missing data. Mean and covariance structure models play an important role in analyzing such data. Two promising methods for dealing with missing data are a direct maximum-likelihood and a two-stage approach based on the unstructured mean and covariance estimates obtained by the EM-algorithm. Typical assumptions under these two methods are ignorable nonresponse and normality of data. However, data sets in social and behavioral sciences are seldom normal, and experience with these procedures indicates that normal theory based methods for nonnormal data very often lead to incorrect model evaluations. By dropping the normal distribution assumption, we develop more accurate procedures for model inference. Based on the theory of generalized estimating equations, a way to obtain consistent standard errors of the two-stage estimates is given. The asymptotic efficiencies of different estimators are compared under various assumptions. We also propose a minimum chi-square approach and show that the estimator obtained by this approach is asymptotically at least as efficient as the two likelihood-based estimators for either normal or nonnormal data. The major contribution of this paper is that for each estimator, we give a test statistic whose asymptotic distribution is chisquare as long as the underlying sampling distribution enjoys finite fourth-order moments. We also give a characterization for each of the two likelihood ratio test statistics when the underlying distribution is nonnormal. Modifications to the likelihood ratio statistics are also given. Our working assumption is that the missing data mechanism is missing completely at random. Examples and Monte Carlo studies indicate that, for commonly encountered nonnormal distributions, the procedures developed in this paper are quite reliable even for samples with missing data that are missing at random.}, - publisher = {{SAGE} Publications}, -} - -@Book{MacKinnon-2008, - author = {David P. MacKinnon}, - series = {Multivariate applications}, - date = {2008}, - title = {Introduction to statistical mediation analysis}, - doi = {10.4324/9780203809556}, - isbn = {9780805864298}, - location = {Hoboken}, - pages = {488}, - library = {QA278.2 .M29 2008}, - addendum = {https://lccn.loc.gov/2007011793}, - abstract = {This volume introduces the statistical, methodological, and conceptual aspects of mediation analysis. Applications from health, social, and developmental psychology, sociology, communication, exercise science, and epidemiology are emphasized throughout. Single-mediator, multilevel, and longitudinal models are reviewed. The author's goal is to help the reader apply mediation analysis to their own data and understand its limitations. - Each chapter features an overview, numerous worked examples, a summary, and exercises (with answers to the odd numbered questions). The accompanying downloadable resources contain outputs described in the book from SAS, SPSS, LISREL, EQS, MPLUS, and CALIS, and a program to simulate the model. The notation used is consistent with existing literature on mediation in psychology. - The book opens with a review of the types of research questions the mediation model addresses. Part II describes the estimation of mediation effects including assumptions, statistical tests, and the construction of confidence limits. Advanced models including mediation in path analysis, longitudinal models, multilevel data, categorical variables, and mediation in the context of moderation are then described. The book closes with a discussion of the limits of mediation analysis, additional approaches to identifying mediating variables, and future directions. - Introduction to Statistical Mediation Analysis is intended for researchers and advanced students in health, social, clinical, and developmental psychology as well as communication, public health, nursing, epidemiology, and sociology. Some exposure to a graduate level research methods or statistics course is assumed. The overview of mediation analysis and the guidelines for conducting a mediation analysis will be appreciated by all readers.}, - publisher = {Erlbaum Psych Press}, - keywords = {Mediation (Statistics)}, -} - -@Book{Venables-Ripley-2002, - author = {W. N. Venables and B. D. Ripley}, - date = {2002}, - title = {Modern applied statistics with {S}}, - doi = {10.1007/978-0-387-21706-2}, - publisher = {Springer New York}, -} - -@Article{Biesanz-Falk-Savalei-2010, - author = {Jeremy C. Biesanz and Carl F. Falk and Victoria Savalei}, - date = {2010-08}, - journaltitle = {Multivariate Behavioral Research}, - title = {Assessing mediational models: Testing and interval estimation for indirect effects}, - doi = {10.1080/00273171.2010.498292}, - number = {4}, - pages = {661--701}, - volume = {45}, - abstract = {Theoretical models specifying indirect or mediated effects are common in the social sciences. An indirect effect exists when an independent variable's influence on the dependent variable is mediated through an intervening variable. Classic approaches to assessing such mediational hypotheses (Baron \& Kenny, 1986; Sobel, 1982) have in recent years been supplemented by computationally intensive methods such as bootstrapping, the distribution of the product methods, and hierarchical Bayesian Markov chain Monte Carlo (MCMC) methods. These different approaches for assessing mediation are illustrated using data from Dunn, Biesanz, Human, and Finn (2007). However, little is known about how these methods perform relative to each other, particularly in more challenging situations, such as with data that are incomplete and/or nonnormal. This article presents an extensive Monte Carlo simulation evaluating a host of approaches for assessing mediation. We examine Type I error rates, power, and coverage. We study normal and nonnormal data as well as complete and incomplete data. In addition, we adapt a method, recently proposed in statistical literature, that does not rely on confidence intervals (CIs) to test the null hypothesis of no indirect effect. The results suggest that the new inferential method--the partial posterior p value--slightly outperforms existing ones in terms of maintaining Type I error rates while maximizing power, especially with incomplete data. Among confidence interval approaches, the bias-corrected accelerated (BCa) bootstrapping approach often has inflated Type I error rates and inconsistent coverage and is not recommended.In contrast, the bootstrapped percentile confidence interval and the hierarchical Bayesian MCMC method perform best overall,maintaining Type I error rates,exhibiting reasonable power, and producing stable and accurate coverage rates.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Blanca-Arnau-LopezMontiel-etal-2013, - author = {Mar\'\iaJ. Blanca and Jaume Arnau and Dolores L{\a'o}pez-Montiel and Roser Bono and Rebecca Bendayan}, - date = {2013-05}, - journaltitle = {Methodology}, - title = {Skewness and kurtosis in real data samples}, - doi = {10.1027/1614-2241/a000057}, - number = {2}, - pages = {78--84}, - volume = {9}, - abstract = {Parametric statistics are based on the assumption of normality. Recent findings suggest that Type I error and power can be adversely affected when data are non-normal. This paper aims to assess the distributional shape of real data by examining the values of the third and fourth central moments as a measurement of skewness and kurtosis in small samples. The analysis concerned 693 distributions with a sample size ranging from 10 to 30. Measures of cognitive ability and of other psychological variables were included. The results showed that skewness ranged between -2.49 and 2.33. The values of kurtosis ranged between -1.92 and 7.41. Considering skewness and kurtosis together the results indicated that only 5.5\% of distributions were close to expected values under normality. Although extreme contamination does not seem to be very frequent, the findings are consistent with previous research suggesting that normality is not the rule with real data.}, - publisher = {Hogrefe Publishing Group}, -} - -@Article{Boettiger-Eddelbuettel-2017, - author = {Carl Boettiger and Dirk Eddelbuettel}, - date = {2017}, - journaltitle = {The R Journal}, - title = {An introduction to {Rocker}: Docker containers for {R}}, - doi = {10.32614/rj-2017-065}, - number = {2}, - pages = {527}, - volume = {9}, - abstract = {We describe the Rocker project, which provides a widely-used suite of Docker images with customized R environments for particular tasks. We discuss how this suite is organized, and how these tools can increase portability, scaling, reproducibility, and convenience of R users and developers.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-docker-rocker}, -} - -@Article{Chow-Ho-Hamaker-etal-2010, - author = {Sy-Miin Chow and Moon-ho R. Ho and Ellen L. Hamaker and Conor V. Dolan}, - date = {2010-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Equivalence and differences between structural equation modeling and state-space modeling techniques}, - doi = {10.1080/10705511003661553}, - number = {2}, - pages = {303--332}, - volume = {17}, - abstract = {State-space modeling techniques have been compared to structural equation modeling (SEM) techniques in various contexts but their unique strengths have often been overshadowed by their similarities to SEM. In this article, we provide a comprehensive discussion of these 2 approaches' similarities and differences through analytic comparisons and numerical simulations, with a focus on their use in representing intraindividual dynamics and interindividual differences. To demonstrate the respective strengths and weaknesses of the 2 approaches in representing these 2 aspects, we simulated data under (a) a cross-sectional common factor model, (b) a latent difference score model with random effects in intercept and slope, and (c) a bivariate dynamic factor analysis model with auto- and cross-regression parameters. Possible ways in which SEM and state-space modeling can be utilized as complementary tools in representing human developmental and other related processes are discussed.}, - publisher = {Informa {UK} Limited}, - annotation = {ild, sem, ssm}, -} - -@Article{Deboeck-Preacher-2015, - author = {Pascal R. Deboeck and Kristopher J. Preacher}, - date = {2015-06}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {No Need to be Discrete: A Method for Continuous Time Mediation Analysis}, - doi = {10.1080/10705511.2014.973960}, - number = {1}, - pages = {61--75}, - volume = {23}, - abstract = {Mediation is one concept that has shaped numerous theories. The list of problems associated with mediation models, however, has been growing. Mediation models based on cross-sectional data can produce unexpected estimates, so much so that making longitudinal or causal inferences is inadvisable. Even longitudinal mediation models have faults, as parameter estimates produced by these models are specific to the lag between observations, leading to much debate over appropriate lag selection. Using continuous time models (CTMs) rather than commonly employed discrete time models, one can estimate lag-independent parameters. We demonstrate methodology that allows for continuous time mediation analyses, with attention to concepts such as indirect and direct effects, partial mediation, the effect of lag, and the lags at which relations become maximal. A simulation compares common longitudinal mediation methods with CTMs. Reanalysis of a published covariance matrix demonstrates that CTMs can be fit to data used in longitudinal mediation studies.}, - publisher = {Informa {UK} Limited}, - keywords = {continuous time models, cross-lagged panel model, exact discrete model, longitudinal mediation, mediation}, - annotation = {ild, ild-mediation}, -} - -@Article{Dudgeon-2017, - author = {Paul Dudgeon}, - date = {2017-03}, - journaltitle = {Psychometrika}, - title = {Some improvements in confidence intervals for standardized regression coefficients}, - doi = {10.1007/s11336-017-9563-z}, - number = {4}, - pages = {928--951}, - volume = {82}, - keywords = {standardized regression coefficients, robust confidence intervals, non-normality}, - abstract = {Yuan and Chan (Psychometrika 76:670–690, 2011. doi:10.1007/S11336-011-9224-6) derived consistent confidence intervals for standardized regression coefficients under fixed and random score assumptions. Jones and Waller (Psychometrika 80:365–378, 2015. doi:10.1007/S11336-013-9380-Y) extended these developments to circumstances where data are non-normal by examining confidence intervals based on Browne's (Br J Math Stat Psychol 37:62–83, 1984. doi:10.1111/j.2044-8317.1984.tb00789.x) asymptotic distribution-free (ADF) theory. Seven different heteroscedastic-consistent (HC) estimators were investigated in the current study as potentially better solutions for constructing confidence intervals on standardized regression coefficients under non-normality. Normal theory, ADF, and HC estimators were evaluated in a Monte Carlo simulation. Findings confirmed the superiority of the HC3 (MacKinnon and White, J Econ 35:305–325, 1985. doi:10.1016/0304-4076(85)90158-7) and HC5 (Cribari-Neto and Da Silva, Adv Stat Anal 95:129–146, 2011. doi:10.1007/s10182-010-0141-2) interval estimators over Jones and Waller's ADF estimator under all conditions investigated, as well as over the normal theory method. The HC5 estimator was more robust in a restricted set of conditions over the HC3 estimator. Some possible extensions of HC estimators to other effect size measures are considered for future developments.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Eddelbuettel-Francois-2011, - author = {Dirk Eddelbuettel and Romain Fran{\c c}ois}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{Rcpp}: Seamless {R} and {C++} integration}, - doi = {10.18637/jss.v040.i08}, - number = {8}, - volume = {40}, - abstract = {The Rcpp package simplifies integrating C++ code with R. It provides a consistent C++ class hierarchy that maps various types of R objects (vectors, matrices, functions, environments, ...) to dedicated C++ classes. Object interchange between R and C++ is managed by simple, flexible and extensible concepts which include broad support for C++ Standard Template Library idioms. C++ code can both be compiled, linked and loaded on the fly, or added via packages. Flexible error and exception code handling is provided. Rcpp substantially lowers the barrier for programmers wanting to combine C++ code with R.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages}, -} - -@Article{Hayes-Scharkow-2013, - author = {Andrew F. Hayes and Michael Scharkow}, - date = {2013-08}, - journaltitle = {Psychological Science}, - title = {The relative trustworthiness of inferential tests of the indirect effect in statistical mediation analysis}, - doi = {10.1177/0956797613480187}, - number = {10}, - pages = {1918--1927}, - volume = {24}, - abstract = {A content analysis of 2 years of Psychological Science articles reveals inconsistencies in how researchers make inferences about indirect effects when conducting a statistical mediation analysis. In this study, we examined the frequency with which popularly used tests disagree, whether the method an investigator uses makes a difference in the conclusion he or she will reach, and whether there is a most trustworthy test that can be recommended to balance practical and performance considerations. We found that tests agree much more frequently than they disagree, but disagreements are more common when an indirect effect exists than when it does not. We recommend the bias-corrected bootstrap confidence interval as the most trustworthy test if power is of utmost concern, although it can be slightly liberal in some circumstances. Investigators concerned about Type I errors should choose the Monte Carlo confidence interval or the distribution-of-the-product approach, which rarely disagree. The percentile bootstrap confidence interval is a good compromise test.}, - publisher = {{SAGE} Publications}, -} - -@Article{Hunter-2017, - author = {Michael D. Hunter}, - date = {2017-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {State Space Modeling in an Open Source, Modular, Structural Equation Modeling Environment}, - doi = {10.1080/10705511.2017.1369354}, - number = {2}, - pages = {307--324}, - volume = {25}, - abstract = {State space models (SSMs) are introduced in the context of structural equation modeling (SEM). In particular, the OpenMx implementation of SSMs using the Kalman filter and prediction error decomposition is discussed. In reflection of modularity, the implementation uses the same full information maximum likelihood missing data procedures for SSMs and SEMs. Similarly, generic OpenMx features such as likelihood ratio tests, profile likelihood confidence intervals, Hessian-based standard errors, definition variables, and the matrix algebra interface are all supported. Example scripts for specification of autoregressive models, multiple lag models (VAR(p)), multiple lag moving average models (VARMA(p, q)), multiple subject models, and latent growth models are provided. Additionally, latent variable calculation based on the Kalman filter and raw data generation based on a model are all included. Finally, future work for extending SSMs to allow for random effects and for presenting them in diagrams is discussed.}, - publisher = {Informa {UK} Limited}, - keywords = {state space model, software, Kalman filter, OpenMx}, - annotation = {ild, ild-software, sem, sem-software, ssm, ssm-software}, -} - -@Article{Jones-Waller-2013a, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013}, - journaltitle = {Psychological Methods}, - title = {Computing confidence intervals for standardized regression coefficients.}, - doi = {10.1037/a0033269}, - number = {4}, - pages = {435--453}, - volume = {18}, - abstract = {With fixed predictors, the standard method (Cohen, Cohen, West, \& Aiken, 2003, p. 86; Harris, 2001, p. 80; Hays, 1994, p. 709) for computing confidence intervals (CIs) for standardized regression coefficients fails to account for the sampling variability of the criterion standard deviation. With random predictors, this method also fails to account for the sampling variability of the predictor standard deviations. Nevertheless, under some conditions the standard method will produce CIs with accurate coverage rates. To delineate these conditions, we used a Monte Carlo simulation to compute empirical CI coverage rates in samples drawn from 36 populations with a wide range of data characteristics. We also computed the empirical CI coverage rates for 4 alternative methods that have been discussed in the literature: noncentrality interval estimation, the delta method, the percentile bootstrap, and the bias-corrected and accelerated bootstrap. Our results showed that for many data-parameter configurations--for example, sample size, predictor correlations, coefficient of determination ($R^2$), orientation of $\beta$ with respect to the eigenvectors of the predictor correlation matrix, $R_X$--the standard method produced coverage rates that were close to their expected values. However, when population $R^2$ was large and when $\beta$ approached the last eigenvector of $R_X$, then the standard method coverage rates were frequently below the nominal rate (sometimes by a considerable amount). In these conditions, the delta method and the 2 bootstrap procedures were consistently accurate. Results using noncentrality interval estimation were inconsistent. In light of these findings, we recommend that researchers use the delta method to evaluate the sampling variability of standardized regression coefficients.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Jones-Waller-2015, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2015-06}, - journaltitle = {Psychometrika}, - title = {The Normal-Theory and Asymptotic Distribution-Free ({ADF}) Covariance Matrix of Standardized Regression Coefficients: Theoretical Extensions and Finite Sample Behavior}, - doi = {10.1007/s11336-013-9380-y}, - number = {2}, - pages = {365--378}, - volume = {80}, - abstract = {Yuan and Chan (Psychometrika, 76, 670–690, 2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a method for computing this covariance matrix from correlations. Next, we describe an asymptotic distribution-free (ADF; Browne in British Journal of Mathematical and Statistical Psychology, 37, 62–83, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that the ADF method works well with nonnormal data in moderate-to-large samples using both simulated and real-data examples. R code (R Development Core Team, 2012) is available from the authors or through the Psychometrika online repository for supplementary materials.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {standardized-regression, standardized-regression-hc}, -} - -@Article{Koopman-Howe-Hollenbeck-etal-2015, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck and Hock-Peng Sin}, - date = {2015}, - journaltitle = {Journal of Applied Psychology}, - title = {Small sample mediation testing: Misplaced confidence in bootstrapped confidence intervals}, - doi = {10.1037/a0036635}, - number = {1}, - pages = {194--202}, - volume = {100}, - abstract = {Bootstrapping is an analytical tool commonly used in psychology to test the statistical significance of the indirect effect in mediation models. Bootstrapping proponents have particularly advocated for its use for samples of 20-80 cases. This advocacy has been heeded, especially in the Journal of Applied Psychology, as researchers are increasingly utilizing bootstrapping to test mediation with samples in this range. We discuss reasons to be concerned with this escalation, and in a simulation study focused specifically on this range of sample sizes, we demonstrate not only that bootstrapping has insufficient statistical power to provide a rigorous hypothesis test in most conditions but also that bootstrapping has a tendency to exhibit an inflated Type I error rate. We then extend our simulations to investigate an alternative empirical resampling method as well as a Bayesian approach and demonstrate that they exhibit comparable statistical power to bootstrapping in small samples without the associated inflated Type I error. Implications for researchers testing mediation hypotheses in small samples are presented. For researchers wishing to use these methods in their own research, we have provided R syntax in the online supplemental materials.}, - publisher = {American Psychological Association ({APA})}, - keywords = {mediation, bootstrapping, permutation, Bayes}, -} - -@Article{Kurtzer-Sochat-Bauer-2017, - author = {Gregory M. Kurtzer and Vanessa Sochat and Michael W. Bauer}, - date = {2017-05}, - journaltitle = {{PLOS} {ONE}}, - title = {{Singularity}: Scientific containers for mobility of compute}, - doi = {10.1371/journal.pone.0177459}, - editor = {Attila Gursoy}, - number = {5}, - pages = {e0177459}, - volume = {12}, - publisher = {Public Library of Science ({PLoS})}, - annotation = {container, container-singularity}, -} - -@Article{Kwan-Chan-2011, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2011-04}, - journaltitle = {Behavior Research Methods}, - title = {Comparing standardized coefficients in structural equation modeling: A model reparameterization approach}, - doi = {10.3758/s13428-011-0088-6}, - number = {3}, - pages = {730--745}, - volume = {43}, - abstract = {We propose a two-stage method for comparing standardized coefficients in structural equation modeling (SEM). At stage 1, we transform the original model of interest into the standardized model by model reparameterization, so that the model parameters appearing in the standardized model are equivalent to the standardized parameters of the original model. At stage 2, we impose appropriate linear equality constraints on the standardized model and use a likelihood ratio test to make statistical inferences about the equality of standardized coefficients. Unlike other existing methods for comparing standardized coefficients, the proposed method does not require specific modeling features (e.g., specification of nonlinear constraints), which are available only in certain SEM software programs. Moreover, this method allows researchers to compare two or more standardized coefficients simultaneously in a standard and convenient way. Three real examples are given to illustrate the proposed method, using EQS, a popular SEM software program. Results show that the proposed method performs satisfactorily for testing the equality of standardized coefficients.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Kwan-Chan-2014, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2014-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Comparing squared multiple correlation coefficients using structural equation modeling}, - doi = {10.1080/10705511.2014.882673}, - number = {2}, - pages = {225--238}, - volume = {21}, - abstract = {In social science research, a common topic in multiple regression analysis is to compare the squared multiple correlation coefficients in different populations. Existing methods based on asymptotic theories (Olkin \& Finn, 1995) and bootstrapping (Chan, 2009) are available but these can only handle a 2-group comparison. Another method based on structural equation modeling (SEM) has been proposed recently. However, this method has three disadvantages. First, it requires the user to explicitly specify the sample R2 as a function in terms of the basic SEM model parameters, which is sometimes troublesome and error prone. Second, it requires the specification of nonlinear constraints, which is not available in some popular SEM software programs. Third, it is for a 2-group comparison primarily. In this article, a 2-stage SEM method is proposed as an alternative. Unlike all other existing methods, the proposed method is simple to use, and it does not require any specific programming features such as the specification of nonlinear constraints. More important, the method allows a simultaneous comparison of 3 or more groups. A real example is given to illustrate the proposed method using EQS, a popular SEM software program.}, - keywords = {squared multiple correlation coefficients, structural equation modeling, model reparameterization, multi-sample analysis}, - publisher = {Informa {UK} Limited}, -} - -@Article{Merkel-2014, - author = {Dirk Merkel}, - date = {2014}, - journaltitle = {Linux Journal}, - title = {{Docker}: Lightweight {Linux} containers for consistent development and deployment}, - number = {239}, - pages = {2}, - volume = {2014}, - url = {https://www.linuxjournal.com/content/docker-lightweight-linux-containers-consistent-development-and-deployment}, - annotation = {container, container-docker}, -} - -@Article{Neale-Hunter-Pritikin-etal-2015, - author = {Michael C. Neale and Michael D. Hunter and Joshua N. Pritikin and Mahsa Zahery and Timothy R. Brick and Robert M. Kirkpatrick and Ryne Estabrook and Timothy C. Bates and Hermine H. Maes and Steven M. Boker}, - date = {2015-01}, - journaltitle = {Psychometrika}, - title = {{OpenMx} 2.0: Extended Structural Equation and Statistical Modeling}, - doi = {10.1007/s11336-014-9435-8}, - number = {2}, - pages = {535--549}, - volume = {81}, - abstract = {The new software package OpenMx 2.0 for structural equation and other statistical modeling is introduced and its features are described. OpenMx is evolving in a modular direction and now allows a mix-and-match computational approach that separates model expectations from fit functions and optimizers. Major backend architectural improvements include a move to swappable open-source optimizers such as the newly written CSOLNP. Entire new methodologies such as item factor analysis and state space modeling have been implemented. New model expectation functions including support for the expression of models in LISREL syntax and a simplified multigroup expectation function are available. Ease-of-use improvements include helper functions to standardize model parameters and compute their Jacobian-based standard errors, access to model components through standard R \$ mechanisms, and improved tab completion from within the R Graphical User Interface.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Ou-Hunter-Chow-2019, - author = {Lu Ou and Michael D. Hunter and Sy-Miin Chow}, - date = {2019}, - journaltitle = {The R Journal}, - title = {What's for {dynr}: A package for linear and nonlinear dynamic modeling in {R}}, - doi = {10.32614/rj-2019-012}, - number = {1}, - pages = {91}, - volume = {11}, - abstract = {Intensive longitudinal data in the behavioral sciences are often noisy, multivariate in nature, and may involve multiple units undergoing regime switches by showing discontinuities interspersed with continuous dynamics. Despite increasing interest in using linear and nonlinear differential/difference equation models with regime switches, there has been a scarcity of software packages that are fast and freely accessible. We have created an R package called dynr that can handle a broad class of linear and nonlinear discreteand continuous-time models, with regime-switching properties and linear Gaussian measurement functions, in C, while maintaining simple and easy-to learn model specification functions in R. We present the mathematical and computational bases used by the dynr R package, and present two illustrative examples to demonstrate the unique features of dynr.}, - publisher = {The R Foundation}, - annotation = {ild, ild-software, r, r-packages}, -} - -@Article{Preacher-Selig-2012, - author = {Kristopher J. Preacher and James P. Selig}, - date = {2012-04}, - journaltitle = {Communication Methods and Measures}, - title = {Advantages of Monte Carlo Confidence Intervals for Indirect Effects}, - doi = {10.1080/19312458.2012.679848}, - number = {2}, - pages = {77--98}, - volume = {6}, - abstract = {Monte Carlo simulation is a useful but underutilized method of constructing confidence intervals for indirect effects in mediation analysis. The Monte Carlo confidence interval method has several distinct advantages over rival methods. Its performance is comparable to other widely accepted methods of interval construction, it can be used when only summary data are available, it can be used in situations where rival methods (e.g., bootstrapping and distribution of the product methods) are difficult or impossible, and it is not as computer-intensive as some other methods. In this study we discuss Monte Carlo confidence intervals for indirect effects, report the results of a simulation study comparing their performance to that of competing methods, demonstrate the method in applied examples, and discuss several software options for implementation in applied settings.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-montecarlo, mediation-bootstrap, semmcci}, -} - -@Article{Rosseel-2012, - author = {Yves Rosseel}, - date = {2012}, - journaltitle = {Journal of Statistical Software}, - title = {{lavaan}: An {R} package for structural equation modeling}, - doi = {10.18637/jss.v048.i02}, - number = {2}, - volume = {48}, - abstract = {Structural equation modeling (SEM) is a vast field and widely used by many applied researchers in the social and behavioral sciences. Over the years, many software packages for structural equation modeling have been developed, both free and commercial. However, perhaps the best state-of-the-art software packages in this field are still closed-source and/or commercial. The R package lavaan has been developed to provide applied researchers, teachers, and statisticians, a free, fully open-source, but commercial-quality package for latent variable modeling. This paper explains the aims behind the development of the package, gives an overview of its most important features, and provides some examples to illustrate how lavaan works in practice.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Schouten-Lugtig-Vink-2018, - author = {Rianne Margaretha Schouten and Peter Lugtig and Gerko Vink}, - date = {2018-07}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Generating missing values for simulation purposes: A multivariate amputation procedure}, - doi = {10.1080/00949655.2018.1491577}, - number = {15}, - pages = {2909--2930}, - volume = {88}, - abstract = {Missing data form a ubiquitous problem in scientific research, especially since most statistical analyses require complete data. To evaluate the performance of methods dealing with missing data, researchers perform simulation studies. An important aspect of these studies is the generation of missing values in a simulated, complete data set: the amputation procedure. We investigated the methodological validity and statistical nature of both the current amputation practice and a newly developed and implemented multivariate amputation procedure. We found that the current way of practice may not be appropriate for the generation of intuitive and reliable missing data problems. The multivariate amputation procedure, on the other hand, generates reliable amputations and allows for a proper regulation of missing data problems. The procedure has additional features to generate any missing data scenario precisely as intended. Hence, the multivariate amputation procedure is an efficient method to accurately evaluate missing data methodology.}, - publisher = {Informa {UK} Limited}, - keywords = {missing data, multiple imputation, multivariate amputation, evaluation}, -} - -@Article{Tofighi-Kelley-2019, - author = {Davood Tofighi and Ken Kelley}, - date = {2019-06}, - journaltitle = {Multivariate Behavioral Research}, - title = {Indirect effects in sequential mediation models: Evaluating methods for hypothesis testing and confidence interval formation}, - doi = {10.1080/00273171.2019.1618545}, - number = {2}, - pages = {188--210}, - volume = {55}, - abstract = {Complex mediation models, such as a two-mediator sequential model, have become more prevalent in the literature. To test an indirect effect in a two-mediator model, we conducted a large-scale Monte Carlo simulation study of the Type I error, statistical power, and confidence interval coverage rates of 10 frequentist and Bayesian confidence/credible intervals (CIs) for normally and nonnormally distributed data. The simulation included never-studied methods and conditions (e.g., Bayesian CI with flat and weakly informative prior methods, two model-based bootstrap methods, and two nonnormality conditions) as well as understudied methods (e.g., profile-likelihood, Monte Carlo with maximum likelihood standard error [MC-ML] and robust standard error [MC-Robust]). The popular BC bootstrap showed inflated Type I error rates and CI under-coverage. We recommend different methods depending on the purpose of the analysis. For testing the null hypothesis of no mediation, we recommend MC-ML, profile-likelihood, and two Bayesian methods. To report a CI, if data has a multivariate normal distribution, we recommend MC-ML, profile-likelihood, and the two Bayesian methods; otherwise, for multivariate nonnormal data we recommend the percentile bootstrap. We argue that the best method for testing hypotheses is not necessarily the best method for CI construction, which is consistent with the findings we present.}, - keywords = {indirect effect, confidence interval, sequential mediation, Bayesian credible interval}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bayesian, mediation-bootstrap, mediation-lb, mediation-montecarlo, semmcci}, -} - -@Article{Tofighi-MacKinnon-2015, - author = {Davood Tofighi and David P. MacKinnon}, - date = {2015-08}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{Monte Carlo} confidence intervals for complex functions of indirect effects}, - doi = {10.1080/10705511.2015.1057284}, - number = {2}, - pages = {194--205}, - volume = {23}, - abstract = {One challenge in mediation analysis is to generate a confidence interval (CI) with high coverage and power that maintains a nominal significance level for any well-defined function of indirect and direct effects in the general context of structural equation modeling (SEM). This study discusses a proposed Monte Carlo extension that finds the CIs for any well-defined function of the coefficients of SEM such as the product of $k$ coefficients and the ratio of the contrasts of indirect effects, using the Monte Carlo method. Finally, we conduct a small-scale simulation study to compare CIs produced by the Monte Carlo, nonparametric bootstrap, and asymptotic-delta methods. Based on our simulation study, we recommend researchers use the Monte Carlo method to test a complex function of indirect effects.}, - keywords = {confidence interval, mediation analysis, Monte Carlo}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-delta, mediation-montecarlo, semmcci}, -} - -@Article{vanBuuren-GroothuisOudshoorn-2011, - author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{mice}: Multivariate Imputation by Chained Equations in {R}}, - doi = {10.18637/jss.v045.i03}, - number = {3}, - volume = {45}, - abstract = {The R package mice imputes incomplete multivariate data by chained equations. The software mice 1.0 appeared in the year 2000 as an S-PLUS library, and in 2001 as an R package. mice 1.0 introduced predictor selection, passive imputation and automatic pooling. This article documents mice, which extends the functionality of mice 1.0 in several ways. In mice, the analysis of imputed data is made completely general, whereas the range of models under which pooling works is substantially extended. mice adds new functionality for imputing multilevel data, automatic predictor selection, data handling, post-processing imputed values, specialized pooling routines, model selection tools, and diagnostic graphs. Imputation of categorical data is improved in order to bypass problems caused by perfect prediction. Special attention is paid to transformations, sum scores, indices and interactions using passive imputation, and to the proper setup of the predictor matrix. mice can be downloaded from the Comprehensive R Archive Network. This article provides a hands-on, stepwise approach to solve applied incomplete data problems.}, - publisher = {Foundation for Open Access Statistic}, - keywords = {MICE, multiple imputation, chained equations, fully conditional specification, Gibbs sampler, predictor selection, passive imputation, R}, -} - -@Article{Wu-Jia-2013, - author = {Wei Wu and Fan Jia}, - date = {2013-09}, - journaltitle = {Multivariate Behavioral Research}, - title = {A new procedure to test mediation with missing data through nonparametric bootstrapping and multiple imputation}, - doi = {10.1080/00273171.2013.816235}, - number = {5}, - pages = {663--691}, - volume = {48}, - abstract = {This article proposes a new procedure to test mediation with the presence of missing data by combining nonparametric bootstrapping with multiple imputation (MI). This procedure performs MI first and then bootstrapping for each imputed data set. The proposed procedure is more computationally efficient than the procedure that performs bootstrapping first and then MI for each bootstrap sample. The validity of the procedure is evaluated using a simulation study under different sample size, missing data mechanism, missing data proportion, and shape of distribution conditions. The result suggests that the proposed procedure performs comparably to the procedure that combines bootstrapping with full information maximum likelihood under most conditions. However, caution needs to be taken when using this procedure to handle missing not-at-random or nonnormal data.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Yuan-Chan-2011, - author = {Ke-Hai Yuan and Wai Chan}, - date = {2011-08}, - journaltitle = {Psychometrika}, - title = {Biases and Standard Errors of Standardized Regression Coefficients}, - doi = {10.1007/s11336-011-9224-6}, - number = {4}, - pages = {670--690}, - volume = {76}, - abstract = {The paper obtains consistent standard errors (SE) and biases of order O(1/n) for the sample standardized regression coefficients with both random and given predictors. Analytical results indicate that the formulas for SEs given in popular text books are consistent only when the population value of the regression coefficient is zero. The sample standardized regression coefficients are also biased in general, although it should not be a concern in practice when the sample size is not too small. Monte Carlo results imply that, for both standardized and unstandardized sample regression coefficients, SE estimates based on asymptotics tend to under-predict the empirical ones at smaller sample sizes.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {asymptotics, bias, consistency, Monte Carlo}, - annotation = {standardized-regression, standardized-regression-delta, standardized-regression-normal, standardized-regression-adf}, -} - -@Article{Yzerbyt-Muller-Batailler-etal-2018, - author = {Vincent Yzerbyt and Dominique Muller and C{\a'e}dric Batailler and Charles M. Judd}, - date = {2018-12}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {New recommendations for testing indirect effects in mediational models: The need to report and test component paths}, - doi = {10.1037/pspa0000132}, - number = {6}, - pages = {929--943}, - volume = {115}, - abstract = {In light of current concerns with replicability and reporting false-positive effects in psychology, we examine Type I errors and power associated with 2 distinct approaches for the assessment of mediation, namely the component approach (testing individual parameter estimates in the model) and the index approach (testing a single mediational index). We conduct simulations that examine both approaches and show that the most commonly used tests under the index approach risk inflated Type I errors compared with the joint-significance test inspired by the component approach. We argue that the tendency to report only a single mediational index is worrisome for this reason and also because it is often accompanied by a failure to critically examine the individual causal paths underlying the mediational model. We recommend testing individual components of the indirect effect to argue for the presence of an indirect effect and then using other recommended procedures to calculate the size of that effect. Beyond simple mediation, we show that our conclusions also apply in cases of within-participant mediation and moderated mediation. We also provide a new R-package that allows for an easy implementation of our recommendations.}, - publisher = {American Psychological Association ({APA})}, - keywords = {indirect effects, mediation, joint-significance, bootstrap}, -} - -@Article{Zhang-Wang-2012, - author = {Zhiyong Zhang and Lijuan Wang}, - date = {2012-12}, - journaltitle = {Psychometrika}, - title = {Methods for mediation analysis with missing data}, - doi = {10.1007/s11336-012-9301-5}, - number = {1}, - pages = {154--184}, - volume = {78}, - abstract = {Despite wide applications of both mediation models and missing data techniques, formal discussion of mediation analysis with missing data is still rare. We introduce and compare four approaches to dealing with missing data in mediation analysis including listwise deletion, pairwise deletion, multiple imputation (MI), and a two-stage maximum likelihood (TS-ML) method. An R package bmem is developed to implement the four methods for mediation analysis with missing data in the structural equation modeling framework, and two real examples are used to illustrate the application of the four methods. The four methods are evaluated and compared under MCAR, MAR, and MNAR missing data mechanisms through simulation studies. Both MI and TS-ML perform well for MCAR and MAR data regardless of the inclusion of auxiliary variables and for AV-MNAR data with auxiliary variables. Although listwise deletion and pairwise deletion have low power and large parameter estimation bias in many studied conditions, they may provide useful information for exploring missing mechanisms.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, missing data, MI, TS-ML, bootstrap, auxiliary variables}, -} - -@Book{Eddelbuettel-2013, - author = {Dirk Eddelbuettel}, - date = {2013}, - title = {Seamless {R} and {C++} integration with {Rcpp}}, - doi = {10.1007/978-1-4614-6868-4}, - isbn = {978-1-4614-6868-4}, - publisher = {Springer New York}, - abstract = {Illustrates a range of statistical computations in R using the Rcpp package. Provides a general introduction to extending R with C++ code. Features an appendix for R users new to the C++ programming language Rcpp packages are presented in the context of useful application case studies.}, - annotation = {r, r-packages}, -} - -@Book{Enders-2010, - author = {Craig K. Enders}, - date = {2010-05-31}, - title = {Applied missing data analysis}, - isbn = {9781606236390}, - pagetotal = {377}, - library = {HA29 .E497 2010}, - addendum = {https://lccn.loc.gov/2010008465}, - abstract = {Walking readers step by step through complex concepts, this book translates missing data techniques into something that applied researchers and graduate students can understand and utilize in their own research. Enders explains the rationale and procedural details for maximum likelihood estimation, Bayesian estimation, multiple imputation, and models for handling missing not at random (MNAR) data. Easy-to-follow examples and small simulated data sets illustrate the techniques and clarify the underlying principles. The companion website (www.appliedmissingdata.com) includes data files and syntax for the examples in the book as well as up-to-date information on software. The book is accessible to substantive researchers while providing a level of detail that will satisfy quantitative specialists.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Missing observations (Statistics), Social sciences--Research--Methodology}, -} - -@InBook{Koopman-Howe-Hollenbeck-2014, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck}, - booktitle = {More statistical and methodological myths and urban legends: Doctrine, verity and fable in organizational and social sciences}, - date = {2014}, - title = {Pulling the {Sobel} test up by its bootstraps}, - bookauthor = {Charles E. Lance and Robert J. Vandenberg}, - isbn = {9780203775851}, - pages = {224--243}, - doi = {10.4324/9780203775851 }, - isbn = {9780203775851}, - abstract = {In the domain of building and testing theory, mediation relationships are among the most important that can be proposed. Mediation helps to explicate our theoretical models (Leavitt, Mitchell, \& Peterson, 2010) and addresses the fundamental question of why two constructs are related (Whetten, 1989). One of the better-known methods for testing mediation is commonly referred to as the ``Sobel test,'' named for the researcher who derived a standard error (Sobel, 1982) to test the significance of the indirect effect. Recently, a number of different research teams (e.g., Preacher \& Hayes, 2004; Shrout \& Bolger, 2002) have criticized the Sobel test because this standard error requires an assumption of normality for the indirect effect sampling distribution. This distribution tends to be positively skewed (i.e,. not normal), particularly in small samples, and so this assumption can be problematic (Preacher \& Hayes, 2004; Stone \& Sobel, 1990). As a result, the statistical power of the Sobel test may be lessened in these contexts (Preacher \& Hayes 2004; Shrout \& Bolger, 2002). In light of this concern, some scholars have advocated instead for the use of bootstrapping to test the significance of the indirect effect (e.g.. Shrout \& Bolger 2002). Bootstrapping requires no a priori assumption about the shape of the sampling distribution because this distribution is empirically estimated using a resampling procedure (Efron \& Tibshirani, 1993). As a result, departures from normality are less troublesome when creating a confidence interval for the indirect effect. For this reason, bootstrapping is now widely believed to be inherently superior to the Sobel test when testing the significance of the indirect effect in organizational research. Our position is that this belief constitutes an urban legend. As with all statistical urban legends, there is an underlying kernel of truth to the belief that bootstrapping is superior to the Sobel test. However, as we discuss in this chapter, there are several reasons to be concerned with a broad belief in the superiority of bootstrapping. We begin with a brief overview of mediation testing focusing on the Sobel test and bootstrapping and then explain the underlying kernel of truth that has propelled bootstrapping to the forefront of mediation testing in organizational research. Subsequently, we discuss four areas of concern that cast doubt on the belief of the inherent superiority of bootstrapping. Finally, we conclude with recommendations concerning the future of mediation testing in organizational research.}, - publisher = {Routledge/Taylor \& Francis Group}, -} - -@Book{Little-Rubin-2019, - author = {Roderick J. A. Little and Donald B. Rubin}, - date = {2019-04}, - title = {Statistical analysis with missing data}, - doi = {10.1002/9781119482260}, - edition = {3}, - isbn = {9781119482260}, - library = {QA276}, - addendum = {https://lccn.loc.gov/2018061330}, - abstract = {An up-to-date, comprehensive treatment of a classic text on missing data in statistics. - The topic of missing data has gained considerable attention in recent decades. This new edition by two acknowledged experts on the subject offers an up-to-date account of practical methodology for handling missing data problems. Blending theory and application, authors Roderick Little and Donald Rubin review historical approaches to the subject and describe simple methods for multivariate analysis with missing values. They then provide a coherent theory for analysis of problems based on likelihoods derived from statistical models for the data and the missing data mechanism, and then they apply the theory to a wide range of important missing data problems. - Statistical Analysis with Missing Data, Third Edition starts by introducing readers to the subject and approaches toward solving it. It looks at the patterns and mechanisms that create the missing data, as well as a taxonomy of missing data. It then goes on to examine missing data in experiments, before discussing complete-case and available-case analysis, including weighting methods. The new edition expands its coverage to include recent work on topics such as nonresponse in sample surveys, causal inference, diagnostic methods, and sensitivity analysis, among a host of other topics. - \begin{itemize} \item An updated ``classic'' written by renowned authorities on the subject \item Features over 150 exercises (including many new ones) \item Covers recent work on important methods like multiple imputation, robust alternatives to weighting, and Bayesian methods \item Revises previous topics based on past student feedback and class experience \item Contains an updated and expanded bibliography \end{itemize} - The authors were awarded The Karl Pearson Prize in 2017 by the International Statistical Institute, for a research contribution that has had profound influence on statistical theory, methodology or applications. Their work ``has been no less than defining and transforming.'' (ISI) - Statistical Analysis with Missing Data, Third Edition is an ideal textbook for upper undergraduate and/or beginning graduate level students of the subject. It is also an excellent source of information for applied statisticians and practitioners in government and industry.}, - publisher = {Wiley}, - keywords = {Mathematical statistics, Mathematical statistics--Problems, exercises, etc., Missing observations (Statistics), Missing observations (Statistics)--Problems, exercises, etc.}, -} - -@Book{Pawitan-2013, - author = {Yudi Pawitan}, - date = {2013-01-17}, - title = {In all likelihood: Statistical modelling and inference using likelihood}, - isbn = {9780199671229}, - pagetotal = {544}, - abstract = {Based on a course in the theory of statistics this text concentrates on what can be achieved using the likelihood/Fisherian method of taking account of uncertainty when studying a statistical problem. It takes the concept ot the likelihood as providing the best methods for unifying the demands of statistical modelling and the theory of inference. Every likelihood concept is illustrated by realistic examples, which are not compromised by computational problems. Examples range from a simile comparison of two accident rates, to complex studies that require generalised linear or semiparametric modelling. - The emphasis is that the likelihood is not simply a device to produce an estimate, but an important tool for modelling. The book generally takes an informal approach, where most important results are established using heuristic arguments and motivated with realistic examples. With the currently available computing power, examples are not contrived to allow a closed analytical solution, and the book can concentrate on the statistical aspects of the data modelling. In addition to classical likelihood theory, the book covers many modern topics such as generalized linear models and mixed models, non parametric smoothing, robustness, the EM algorithm and empirical likelihood.}, - publisher = {Oxford University Press}, -} - -@Book{vanBuuren-2018, - author = {Stef {van Buuren}}, - date = {2018-07}, - title = {Flexible imputation of missing data}, - doi = {10.1201/9780429492259}, - edition = {2}, - isbn = {9780429492259}, - publisher = {Chapman and Hall/{CRC}}, - library = {QA278}, - addendum = {https://lccn.loc.gov/2019719619}, - abstract = {Missing data pose challenges to real-life data analysis. Simple ad-hoc fixes, like deletion or mean imputation, only work under highly restrictive conditions, which are often not met in practice. Multiple imputation replaces each missing value by multiple plausible values. The variability between these replacements reflects our ignorance of the true (but missing) value. Each of the completed data set is then analyzed by standard methods, and the results are pooled to obtain unbiased estimates with correct confidence intervals. Multiple imputation is a general approach that also inspires novel solutions to old problems by reformulating the task at hand as a missing-data problem. - This is the second edition of a popular book on multiple imputation, focused on explaining the application of methods through detailed worked examples using the MICE package as developed by the author. This new edition incorporates the recent developments in this fast-moving field. - This class-tested book avoids mathematical and technical details as much as possible: formulas are accompanied by verbal statements that explain the formula in accessible terms. The book sharpens the reader’s intuition on how to think about missing data, and provides all the tools needed to execute a well-grounded quantitative analysis in the presence of missing data.}, - keywords = {Multivariate analysis, Multiple imputation (Statistics), Missing observations (Statistics)}, -} - -@InCollection{Zhang-Wang-Tong-2015, - author = {Zhiyong Zhang and Lijuan Wang and Xin Tong}, - booktitle = {Quantitative Psychology Research}, - date = {2015}, - title = {Mediation analysis with missing data through multiple imputation and bootstrap}, - doi = {10.1007/978-3-319-19977-1_24}, - pages = {341--355}, - abtract = {A method using multiple imputation and bootstrap for dealing with missing data in mediation analysis is introduced and implemented in both SAS and R. Through simulation studies, it is shown that the method performs well for both MCAR and MAR data without and with auxiliary variables. It is also shown that the method can work for MNAR data if auxiliary variables related to missingness are included. The application of the method is demonstrated through the analysis of a subset of data from the National Longitudinal Survey of Youth. Mediation analysis with missing data can be conducted using the provided SAS macros and R package bmem.}, - publisher = {Springer International Publishing}, - keywords = {mediation analysis, missing data, multiple imputation, bootstrap}, -} - -@Report{Jones-Waller-2013b, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013-05-25}, - institution = {University of Minnesota-Twin Cities}, - title = {The normal-theory and asymptotic distribution-free ({ADF}) covariance matrix of standardized regression coefficients: Theoretical extensions and finite sample behavior}, - type = {techreport}, - url = {http://users.cla.umn.edu/~nwaller/downloads/techreports/TR052913.pdf}, - urldate = {2022-07-22}, - abstract = {Yuan and Chan (2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a new method for computing this covariance matrix from correlations. We then show that Yuan and Chan's original equations can also be used when only correlational data are available. Next, we describe an asymptotic distribution-free (ADF; Browne, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that theADF method works well with non-normal data in moderate-to-large samples using both simulated and real-data examples. Finally, we provide R code (R Development Core Team, 2012) in an Appendix to make these methods accessible to applied researchers.}, -} - -@Manual{Muthen-Muthen-2017, - author = {Linda K. Muth{\a'e}n and Bengt O. Muth{\a'e}n}, - date = {2017}, - title = {{Mplus} user’s guide. {Eighth} edition}, - location = {Los Angeles, CA}, - publisher = {{Muth\'en} \& {Muth\'en}}, - annotation = {sem, sem-software}, -} - -@Article{Cheung-2021, - author = {Mike W.-L. Cheung}, - date = {2021-06}, - journaltitle = {Alcohol and Alcoholism}, - title = {Synthesizing indirect effects in mediation models with meta-analytic methods}, - doi = {10.1093/alcalc/agab044}, - number = {1}, - pages = {5--15}, - volume = {57}, - abstract = {Aims - A mediator is a variable that explains the underlying mechanism between an independent variable and a dependent variable. The indirect effect indicates the effect from the predictor to the outcome variable via the mediator. In contrast, the direct effect represents the predictor's effort on the outcome variable after controlling for the mediator. - Methods - A single study rarely provides enough evidence to answer research questions in a particular domain. Replications are generally recommended as the gold standard to conduct scientific research. When a sufficient number of studies have been conducted addressing similar research questions, a meta-analysis can be used to synthesize those studies' findings. - Results - The main objective of this paper is to introduce two frameworks to integrating studies using mediation analysis. The first framework involves calculating standardized indirect effects and direct effects and conducting a multivariate meta-analysis on those effect sizes. The second one uses meta-analytic structural equation modeling to synthesize correlation matrices and fit mediation models on the average correlation matrix. We illustrate these procedures on a real dataset using the R statistical platform. - Conclusion - This paper closes with some further directions for future studies.}, - publisher = {Oxford University Press ({OUP})}, - keywords = {heterogeneity, gold standard, outcome variable, datasets, mediation analysis}, -} - -@Article{Cheung-Pesigan-2023a, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {{FINDOUT}: Using either {SPSS} commands or graphical user interface to identify influential cases in structural equation modeling in {AMOS}}, - doi = {10.1080/00273171.2022.2148089}, - pages = {1--5}, - abstract = {The results in a structural equation modeling (SEM) analysis can be influenced by just a few observations, called influential cases. Tools have been developed for users of R to identify them. However, similar tools are not available for AMOS, which is also a popular SEM software package. We introduce the FINDOUT toolset, a group of SPSS extension commands, and an AMOS plugin, to identify influential cases and examine how these cases influence the results. The SPSS commands can be used either as syntax commands or as custom dialogs from pull-down menus, and the AMOS plugin can be run from AMOS pull-down menu. We believe these tools can help researchers to examine the robustness of their findings to influential cases.}, - publisher = {Informa {UK} Limited}, - keywords = {influential cases, outliers, structural equation modeling, AMOS, sensitivity analysis, SPSS}, -} - -@Article{Cheung-Pesigan-2023b, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-05}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{semlbci}: An {R} package for forming likelihood-based confidence intervals for parameter estimates, correlations, indirect effects, and other derived parameters}, - doi = {10.1080/10705511.2023.2183860}, - pages = {1--15}, - abstract = {There are three common types of confidence interval (CI) in structural equation modeling (SEM): Wald-type CI, bootstrapping CI, and likelihood-based CI (LBCI). LBCI has the following advantages: (1) it has better coverage probabilities and Type I error rate compared to Wald-type CI when the sample size is finite; (2) it correctly tests the null hypothesis of a parameter based on likelihood ratio chi-square difference test; (3) it is less computationally intensive than bootstrapping CI; and (4) it is invariant to transformations. However, LBCI is not available in many popular SEM software packages. We developed an R package, semlbci, for forming LBCI for parameters in models fitted by lavaan, a popular open-source SEM package, such that researchers have more options in forming CIs for parameters in SEM. The package supports both unstandardized and standardized estimates, derived parameters such as indirect effect, multisample models, and the robust LBCI proposed by Falk.}, - publisher = {Informa {UK} Limited}, - keywords = {confidence interval, likelihood-based confidence interval, robust method, structural equation modeling}, - annotation = {r, r-packages, sem, sem-software, lb}, -} - -@Article{Cheung-Pesigan-Vong-2022, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan and Weng Ngai Vong}, - date = {2022-03}, - journaltitle = {Behavior Research Methods}, - title = {{DIY} bootstrapping: Getting the nonparametric bootstrap confidence interval in {SPSS} for any statistics or function of statistics (when this bootstrapping is appropriate)}, - doi = {10.3758/s13428-022-01808-5}, - number = {2}, - pages = {474--490}, - volume = {55}, - abstract = {Researchers can generate bootstrap confidence intervals for some statistics in SPSS using the BOOTSTRAP command. However, this command can only be applied to selected procedures, and only to selected statistics in these procedures. We developed an extension command and prepared some sample syntax files based on existing approaches from the Internet to illustrate how researchers can (a) generate a large number of nonparametric bootstrap samples, (b) do desired analysis on all these samples, and (c) form the bootstrap confidence intervals for selected statistics using the OMS commands. We developed these tools to help researchers apply nonparametric bootstrapping to any statistics for which this method is appropriate, including statistics derived from other statistics, such as standardized effect size measures computed from the t test results. We also discussed how researchers can extend the tools for other statistics and scenarios they encounter.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {bootstrapping, effect sizes, confidence intervals}, -} - -@Article{Li-Oravecz-Zhou-etal-2022, - author = {Yanling Li and Zita Oravecz and Shuai Zhou and Yosef Bodovski and Ian J. Barnett and Guangqing Chi and Yuan Zhou and Naomi P. Friedman and Scott I. Vrieze and Sy-Miin Chow}, - date = {2022-01}, - journaltitle = {Psychometrika}, - title = {{Bayesian} forecasting with a regime-switching zero-inflated multilevel poisson regression model: An application to adolescent alcohol use with spatial covariates}, - doi = {10.1007/s11336-021-09831-9}, - number = {2}, - pages = {376--402}, - volume = {87}, - abstract = {In this paper, we present and evaluate a novel Bayesian regime-switching zero-inflated multilevel Poisson (RS-ZIMLP) regression model for forecasting alcohol use dynamics. The model partitions individuals’ data into two phases, known as regimes, with: (1) a zero-inflation regime that is used to accommodate high instances of zeros (non-drinking) and (2) a multilevel Poisson regression regime in which variations in individuals’ log-transformed average rates of alcohol use are captured by means of an autoregressive process with exogenous predictors and a person-specific intercept. The times at which individuals are in each regime are unknown, but may be estimated from the data. We assume that the regime indicator follows a first-order Markov process as related to exogenous predictors of interest. The forecast performance of the proposed model was evaluated using a Monte Carlo simulation study and further demonstrated using substance use and spatial covariate data from the Colorado Online Twin Study (CoTwins). Results showed that the proposed model yielded better forecast performance compared to a baseline model which predicted all cases as non-drinking and a reduced ZIMLP model without the RS structure, as indicated by higher AUC (the area under the receiver operating characteristic (ROC) curve) scores, and lower mean absolute errors (MAEs) and root-mean-square errors (RMSEs). The improvements in forecast performance were even more pronounced when we limited the comparisons to participants who showed at least one instance of transition to drinking. }, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Bayesian zero-inflated Poisson model, forecast, intensive longitudinal data, regime-switching, spatial data, substance use}, - annotation = {bayesian, ild}, -} - -@Article{McNeish-MacKinnon-2022, - author = {Daniel McNeish and David P. MacKinnon}, - date = {2022-12}, - journaltitle = {Psychological Methods}, - title = {Intensive longitudinal mediation in {Mplus}}, - doi = {10.1037/met0000536}, - abstract = {Much of the existing longitudinal mediation literature focuses on panel data where relatively few repeated measures are collected over a relatively broad timespan. However, technological advances in data collection (e.g., smartphones, wearables) have led to a proliferation of short duration, densely collected longitudinal data in behavioral research. These intensive longitudinal data differ in structure and focus relative to traditionally collected panel data. As a result, existing methodological resources do not necessarily extend to nuances present in the recent influx of intensive longitudinal data and designs. In this tutorial, we first cover potential limitations of traditional longitudinal mediation models to accommodate unique characteristics of intensive longitudinal data. Then, we discuss how recently developed dynamic structural equation models (DSEMs) may be well-suited for mediation modeling with intensive longitudinal data and can overcome some of the limitations associated with traditional approaches. We describe four increasingly complex intensive longitudinal mediation models: (a) stationary models where the indirect effect is constant over time and people, (b) person-specific models where the indirect effect varies across people, (c) dynamic models where the indirect effect varies across time, and (d) cross-classified models where the indirect effect varies across both time and people. We apply each model to a running example featuring a mobile health intervention designed to improve health behavior of individuals with binge eating disorder. In each example, we provide annotated Mplus code and interpretation of the output to guide empirical researchers through mediation modeling with this increasingly popular type of longitudinal data.}, - publisher = {American Psychological Association ({APA})}, - keywords = {intensive longitudinal data, time-series, mediation, EMA, daily diary}, - annotation = {ild, ild-mediation, ild-software}, -} - -@Article{Nust-Eddelbuettel-Bennett-etal-2020, - author = {Daniel N{\"u}st and Dirk Eddelbuettel and Dom Bennett and Robrecht Cannoodt and Dav Clark and Gergely Dar{\a'o}czi and Mark Edmondson and Colin Fay and Ellis Hughes and Lars Kjeldgaard and Sean Lopp and Ben Marwick and Heather Nolis and Jacqueline Nolis and Hong Ooi and Karthik Ram and Noam Ross and Lori Shepherd and P{\a'e}ter S{\a'o}lymos and Tyson Lee Swetnam and Nitesh Turaga and Charlotte {Van Petegem} and Jason Williams and Craig Willis and Nan Xiao}, - date = {2020}, - journaltitle = {The R Journal}, - title = {The {Rockerverse}: Packages and applications for containerisation with {R}}, - doi = {10.32614/rj-2020-007}, - number = {1}, - pages = {437}, - volume = {12}, - abstract = {The Rocker Project provides widely used Docker images for R across different application scenarios. This article surveys downstream projects that build upon the Rocker Project images and presents the current state of R packages for managing Docker images and controlling containers. These use cases cover diverse topics such as package development, reproducible research, collaborative work, cloud-based data processing, and production deployment of services. The variety of applications demonstrates the power of the Rocker Project specifically and containerisation in general. Across the diverse ways to use containers, we identified common themes: reproducible environments, scalability and efficiency, and portability across clouds. We conclude that the current growth and diversification of use cases is likely to continue its positive impact, but see the need for consolidating the Rockerverse ecosystem of packages, developing common practices for applications, and exploring alternative containerisation software.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-rocker}, -} - -@Article{Pesigan-Cheung-2020, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2020-12}, - journaltitle = {Frontiers in Psychology}, - title = {{SEM}-based methods to form confidence intervals for indirect effect: Still applicable given nonnormality, under certain conditions}, - doi = {10.3389/fpsyg.2020.571928}, - volume = {11}, - abstract = {A SEM-based approach using likelihood-based confidence interval (LBCI) has been proposed to form confidence intervals for unstandardized and standardized indirect effect in mediation models. However, when used with the maximum likelihood estimation, this approach requires that the variables are multivariate normally distributed. This can affect the LBCIs of unstandardized and standardized effect differently. In the present study, the robustness of this approach when the predictor is not normally distributed but the error terms are conditionally normal, which does not violate the distributional assumption of ordinary least squares (OLS) estimation, is compared to four other approaches: nonparametric bootstrapping, two variants of LBCI, LBCI assuming the predictor is fixed (LBCI-Fixed-X) and LBCI based on ADF estimation (LBCI-ADF), and Monte Carlo. A simulation study was conducted using a simple mediation model and a serial mediation model, manipulating the distribution of the predictor. The Monte Carlo method performed worst among the methods. LBCI and LBCI-Fixed-X had suboptimal performance when the distributions had high kurtosis and the population indirect effects were medium to large. In some conditions, the problem was severe even when the sample size was large. LBCI-ADF and nonparametric bootstrapping had coverage probabilities close to the nominal value in nearly all conditions, although the coverage probabilities were still suboptimal for the serial mediation model when the sample size was small with respect to the model. Implications of these findings in the context of this special case of nonnormal data were discussed.}, - publisher = {Frontiers Media {SA}}, - keywords = {mediation, nonnormal, confidence interval, structural equation modeling, bootstrapping}, -} - -@Article{Pesigan-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2023-08}, - journaltitle = {Behavior Research Methods}, - title = {{Monte Carlo} confidence intervals for the indirect effect with missing data}, - doi = {10.3758/s13428-023-02114-4}, - abstract = {Missing data is a common occurrence in mediation analysis. As a result, the methods used to construct confidence intervals around the indirect effect should consider missing data. Previous research has demonstrated that, for the indirect effect in data with complete cases, the Monte Carlo method performs as well as nonparametric bootstrap confidence intervals (see MacKinnon et al., Multivariate Behavioral Research, 39(1), 99–128, 2004; Preacher \& Selig, Communication Methods and Measures, 6(2), 77–98, 2012; Tofighi \& MacKinnon, Structural Equation Modeling: A Multidisciplinary Journal, 23(2), 194–205, 2015). In this manuscript, we propose a simple, fast, and accurate two-step approach for generating confidence intervals for the indirect effect, in the presence of missing data, based on the Monte Carlo method. In the first step, an appropriate method, for example, full-information maximum likelihood or multiple imputation, is used to estimate the parameters and their corresponding sampling variance-covariance matrix in a mediation model. In the second step, the sampling distribution of the indirect effect is simulated using estimates from the first step. A confidence interval is constructed from the resulting sampling distribution. A simulation study with various conditions is presented. Implications of the results for applied research are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Monte Carlo method, nonparametric bootstrap, indirect effect, mediation, missing completely at random, missing at random, full-information maximum likelihood, multiple imputation}, - annotation = {mediation, mediation-montecarlo, mediation-bootstrap, semmcci}, -} - -@Article{Pesigan-Sun-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Rong Wei Sun and Shu Fai Cheung}, - date = {2023-04}, - journaltitle = {Multivariate Behavioral Research}, - title = {{betaDelta} and {betaSandwich}: Confidence intervals for standardized regression coefficients in {R}}, - doi = {10.1080/00273171.2023.2201277}, - pages = {1--4}, - abstract = {The multivariate delta method was used by Yuan and Chan to estimate standard errors and confidence intervals for standardized regression coefficients. Jones and Waller extended the earlier work to situations where data are nonnormal by utilizing Browne’s asymptotic distribution-free (ADF) theory. Furthermore, Dudgeon developed standard errors and confidence intervals, employing heteroskedasticity-consistent (HC) estimators, that are robust to nonnormality with better performance in smaller sample sizes compared to Jones and Waller’s ADF technique. Despite these advancements, empirical research has been slow to adopt these methodologies. This can be a result of the dearth of user-friendly software programs to put these techniques to use. We present the betaDelta and the betaSandwich packages in the R statistical software environment in this manuscript. Both the normal-theory approach and the ADF approach put forth by Yuan and Chan and Jones and Waller are implemented by the betaDelta package. The HC approach proposed by Dudgeon is implemented by the betaSandwich package. The use of the packages is demonstrated with an empirical example. We think the packages will enable applied researchers to accurately assess the sampling variability of standardized regression coefficients.}, - publisher = {Informa {UK} Limited}, - keywords = {standardized regression coefficients, confidence intervals, delta method standard errors, heteroskedasticity-consistent standard errors, R package}, - annotation = {r, r-packages}, -} - -@Article{Savalei-Rosseel-2021, - author = {Victoria Savalei and Yves Rosseel}, - date = {2021-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Computational options for standard errors and test statistics with incomplete normal and nonnormal data in {SEM}}, - doi = {10.1080/10705511.2021.1877548}, - number = {2}, - pages = {163--181}, - volume = {29}, - abstract = {This article provides an overview of different computational options for inference following normal theory maximum likelihood (ML) estimation in structural equation modeling (SEM) with incomplete normal and nonnormal data. Complete data are covered as a special case. These computational options include whether the information matrix is observed or expected, whether the observed information matrix is estimated numerically or using an analytic asymptotic approximation, and whether the information matrix and the outer product matrix of the score vector are evaluated at the saturated or at the structured estimates. A variety of different standard errors and robust test statistics become possible by varying these options. We review the asymptotic properties of these computational variations, and we show how to obtain them using lavaan in R. We hope that this article will encourage methodologists to study the impact of the available computational options on the performance of standard errors and test statistics in SEM.}, - publisher = {Informa {UK} Limited}, - keywords = {incomplete data, nonnormal data, robust corrections, software implementation}, -} - -@Article{Tofighi-Kelley-2020, - author = {Davood Tofighi and Ken Kelley}, - date = {2020}, - journaltitle = {Psychological Methods}, - title = {Improved inference in mediation analysis: Introducing the model-based constrained optimization procedure}, - doi = {10.1037/met0000259}, - pages = {496--515}, - volume = {25}, - abstract = {Mediation analysis is an important approach for investigating causal pathways. One approach used in mediation analysis is the test of an indirect effect, which seeks to measure how the effect of an independent variable impacts an outcome variable through one or more mediators. However, in many situations the proposed tests of indirect effects, including popular confidence interval-based methods, tend to produce poor Type I error rates when mediation does not occur and, more generally, only allow dichotomous decisions of ``not significant'' or ``significant'' with regards to the statistical conclusion. To remedy these issues, we propose a new method, a likelihood ratio test (LRT), that uses non-linear constraints in what we term the model-based constrained optimization (MBCO) procedure. The MBCO procedure (a) offers a more robust Type I error rate than existing methods; (b) provides a p-value, which serves as a continuous measure of compatibility of data with the hypothesized null model (not just a dichotomous reject or fail-to-reject decision rule); (c) allows simple and complex hypotheses about mediation (i.e., one or more mediators; different mediational pathways), and (d) allows the mediation model to use observed or latent variables. The MBCO procedure is based on a structural equation modeling framework (even if latent variables are not specified) with specialized fitting routines, namely with the use of non-linear constraints. We advocate using the MBCO procedure to test hypotheses about an indirect effect in addition to reporting a confidence interval to capture uncertainty about the indirect effect because this combination transcends existing methods.}, - publisher = {{American Psychological Association ({APA})}}, -} - -@Article{Wang-Zhang-2020, - author = {Lijuan Wang and Qian Zhang}, - date = {2020-06}, - journaltitle = {Psychological Methods}, - title = {Investigating the impact of the time interval selection on autoregressive mediation modeling: Result interpretations, effect reporting, and temporal designs}, - doi = {10.1037/met0000235}, - number = {3}, - pages = {271--291}, - volume = {25}, - abstract = {This study investigates the impact of the time interval (the time passed between 2 consecutive measurements) selection on autoregressive mediation modeling (AMM). For a widely used autoregressive mediation model, via analytical derivations, we explained why and how the conventionally reported time-specific coefficient estimates (e.g., $\hat{a} \hat{b}$ and $\hat{c}^{\prime}$ ) and inference results in AMM provide limited information and can arrive in even misleading conclusions about direct and indirect effects over time. Furthermore, under the stationarity assumption, we proposed an approach to calculate the overall direct and indirect effect estimates over time and the time lag lengths at which they reach maxima, using AMM results. The derivation results revealed that the overall direct and indirect effect curves are asymptotically invariant to the time interval selection, under stationarity. With finite samples and thus sampling errors and potential computing problems, however, our simulation results revealed that the overall indirect effect curves were better recovered when the time interval is selected to be closer to half of the time lag length at which the overall indirect effect reaches its maximum. An R function and an R Shiny app were developed to obtain the overall direct and indirect effect curves over time and facilitate the time interval selection using AMM results. Our findings provide another look at the connections between AMM and continuous time mediation modeling and the connections are discussed.}, - publisher = {American Psychological Association ({APA})}, - keywords = {longitudinal mediation, autoregressive mediation modeling, time interval selection, time-specific indirect effect, overall indirect effect}, - annotation = {ild, ild-mediation}, -} - -@Book{Hayes-2022, - author = {Andrew F. Hayes}, - date = {2022}, - title = {Introduction to mediation, moderation, and conditional process analysis: A regression-based approach}, - series = {Methodology in the social sciences}, - edition = {3}, - isbn = {9781462549030}, - pages = {732}, - library = {HA31.3 .H39 2022}, - addendum = {https://lccn.loc.gov/2021031108}, - abstract = {Lauded for its easy-to-understand, conversational discussion of the fundamentals of mediation, moderation, and conditional process analysis, this book has been fully revised with 50\% new content, including sections on working with multicategorical antecedent variables, the use of PROCESS version 3 for SPSS and SAS for model estimation, and annotated PROCESS v3 outputs. Using the principles of ordinary least squares regression, Andrew F. Hayes carefully explains procedures for testing hypotheses about the conditions under and the mechanisms by which causal effects operate, as well as the moderation of such mechanisms. Hayes shows how to estimate and interpret direct, indirect, and conditional effects; probe and visualize interactions; test questions about moderated mediation; and report different types of analyses. Data for all the examples are available on the companion website (www.afhayes.com) along with links to download PROCESS.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Mediation (Statistics), Regression analysis}, -} - -@Manual{Arbuckle-2020, - author = {James L. Arbuckle}, - date = {2020}, - title = {Amos 27.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Manual{Arbuckle-2021, - author = {James L. Arbuckle}, - date = {2021}, - title = {Amos 28.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Report{Asparouhov-Muthen-2022, - author = {Tihomir Asparouhov and Bengt O. Muth{\a'e}n}, - date = {2022}, - title = {Multiple imputation with {Mplus}}, - type = {techreport}, - url = {http://www.statmodel.com/download/Imputations7.pdf}, - institution = {http://www.statmodel.com}, -} - -@Manual{Eddelbuettel-Francois-Allaire-etal-2023, - title = {{Rcpp}: Seamless {R} and {C++} Integration}, - author = {Dirk Eddelbuettel and Romain Francois and JJ Allaire and Kevin Ushey and Qiang Kou and Nathan Russell and Inaki Ucar and Douglas Bates and John Chambers}, - year = {2023}, - note = {R package version 1.0.11}, - url = {https://CRAN.R-project.org/package=Rcpp}, - annotation = {r, r-package}, -} - -@Manual{Jorgensen-Pornprasertmanit-Schoemann-etal-2022, - title = {{semTools}: Useful tools for structural equation modeling}, - author = {Terrence D. Jorgensen and Sunthud Pornprasertmanit and Alexander M. Schoemann and Yves Rosseel}, - year = {2022}, - note = {R package version 0.5-6}, - url = {https://CRAN.R-project.org/package=semTools}, -} - -@Misc{Kurtzer-cclerget-Bauer-etal-2021, - author = {Gregory M. Kurtzer and {cclerget} and Michael Bauer and Ian Kaneshiro and David Trudgian and David Godlove}, - date = {2021}, - title = {{hpcng/singularity: Singularity 3.7.3}}, - doi = {10.5281/ZENODO.1310023}, - copyright = {Open Access}, - publisher = {Zenodo}, - annotation = {container, container-singularity}, -} - -@Manual{RCoreTeam-2021, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2021}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2022, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2022}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2023, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2023}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{Waller-2022, - author = {Niels G. Waller}, - title = {{fungible}: Psychometric functions from the {Waller Lab}}, - year = {2022}, - note = {R package version 2.2.1}, - url = {https://CRAN.R-project.org/package=fungible}, - publisher = {The R Foundation}, - annotation = {r, r-package}, -} - -@PhdThesis{Pesigan-2022, - author = {Ivan Jacob Agaloos Pesigan}, - year = {2022}, - school = {University of Macau}, - title = {Confidence intervals for standardized coefficients: Applied to regression coefficients in primary studies and indirect effects in meta-analytic structural equation modeling}, - type = {phdthesis}, -} diff --git a/.setup/latex/pdf/.gitignore b/.setup/latex/pdf/.gitignore deleted file mode 100644 index acd02c2..0000000 --- a/.setup/latex/pdf/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -* -*/ -!*.pdf -!.gitignore -!bib.bib diff --git a/.setup/latex/pdf/betaSandwich-001-description.pdf b/.setup/latex/pdf/betaSandwich-001-description.pdf deleted file mode 100644 index 03f47e9..0000000 Binary files a/.setup/latex/pdf/betaSandwich-001-description.pdf and /dev/null differ diff --git a/.setup/latex/pdf/betaSandwich-999-session.pdf b/.setup/latex/pdf/betaSandwich-999-session.pdf deleted file mode 100644 index d08ae06..0000000 Binary files a/.setup/latex/pdf/betaSandwich-999-session.pdf and /dev/null differ diff --git a/.setup/latex/pdf/betaSandwich-zzz-references.pdf b/.setup/latex/pdf/betaSandwich-zzz-references.pdf deleted file mode 100644 index e84cb5e..0000000 Binary files a/.setup/latex/pdf/betaSandwich-zzz-references.pdf and /dev/null differ diff --git a/.setup/latex/pdf/betaSandwich-zzz-tests-benchmark.pdf b/.setup/latex/pdf/betaSandwich-zzz-tests-benchmark.pdf deleted file mode 100644 index 73e4b4e..0000000 Binary files a/.setup/latex/pdf/betaSandwich-zzz-tests-benchmark.pdf and /dev/null differ diff --git a/.setup/latex/pdf/betaSandwich-zzz-tests-external.pdf b/.setup/latex/pdf/betaSandwich-zzz-tests-external.pdf deleted file mode 100644 index fdd2fb1..0000000 Binary files a/.setup/latex/pdf/betaSandwich-zzz-tests-external.pdf and /dev/null differ diff --git a/.setup/latex/pdf/betaSandwich-zzz-tests-internal.pdf b/.setup/latex/pdf/betaSandwich-zzz-tests-internal.pdf deleted file mode 100644 index 6bc7e43..0000000 Binary files a/.setup/latex/pdf/betaSandwich-zzz-tests-internal.pdf and /dev/null differ diff --git a/.setup/latex/pdf/betaSandwich-zzz-tests-staging.pdf b/.setup/latex/pdf/betaSandwich-zzz-tests-staging.pdf deleted file mode 100644 index f16ea72..0000000 Binary files a/.setup/latex/pdf/betaSandwich-zzz-tests-staging.pdf and /dev/null differ diff --git a/.setup/latex/pdf/bib.bib b/.setup/latex/pdf/bib.bib deleted file mode 100644 index cd21073..0000000 --- a/.setup/latex/pdf/bib.bib +++ /dev/null @@ -1,1450 +0,0 @@ -@Article{Craig-1936, - author = {Cecil C. Craig}, - date = {1936-03}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {On the frequency function of $xy$}, - doi = {10.1214/aoms/1177732541}, - number = {1}, - pages = {1--15}, - volume = {7}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Aroian-1947, - author = {Leo A. Aroian}, - date = {1947-06}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The probability function of the product of two normally distributed variables}, - doi = {10.1214/aoms/1177730442}, - number = {2}, - pages = {265--271}, - volume = {18}, - abstract = {Let $x$ and $y$ follow a normal bivariate probability function with means $\bar X, \bar Y$, standard deviations $\sigma_1, \sigma_2$, respectively, $r$ the coefficient of correlation, and $\rho_1 = \bar X/\sigma_1, \rho_2 = \bar Y/\sigma_2$. Professor C. C. Craig [1] has found the probability function of $z = xy/\sigma_1\sigma_2$ in closed form as the difference of two integrals. For purposes of numerical computation he has expanded this result in an infinite series involving powers of $z, \rho_1, \rho_2$, and Bessel functions of a certain type; in addition, he has determined the moments, semin-variants, and the moment generating function of $z$. However, for $\rho_1$ and $\rho_2$ large, as Craig points out, the series expansion converges very slowly. Even for $\rho_1$ and $\rho_2$ as small as 2, the expansion is unwieldy. We shall show that as $\rho_1$ and $\rho_2 \rightarrow \infty$, the probability function of $z$ approaches a normal curve and in case $r = 0$ the Type III function and the Gram-Charlier Type A series are excellent approximations to the $z$ distribution in the proper region. Numerical integration provides a substitute for the infinite series wherever the exact values of the probability function of $z$ are needed. Some extensions of the main theorem are given in section 5 and a practical problem involving the probability function of $z$ is solved.}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Cochran-1952, - author = {William G. Cochran}, - date = {1952-09}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The $\chi^{2}$ test of goodness of fit}, - doi = {10.1214/aoms/1177729380}, - number = {3}, - pages = {315--345}, - volume = {23}, - publisher = {Institute of Mathematical Statistics}, - abstract = {This paper contains an expository discussion of the chi square test of goodness of fit, intended for the student and user of statistical theory rather than for the expert. Part I describes the historical development of the distribution theory on which the test rests. Research bearing on the practical application of the test--in particular on the minimum expected number per class and the construction of classes--is discussed in Part II. Some varied opinions about the extent to which the test actually is useful to the scientist are presented in Part III. Part IV outlines a number of tests that have been proposed as substitutes for the chi square test (the $\omega^2$ test, the smooth test, the likelihood ratio test) and Part V a number of supplementary tests (the run test, tests based on low moments, subdivision of chi square into components).}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Goodman-1960, - author = {Leo A. Goodman}, - date = {1960-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {On the exact variance of products}, - doi = {10.1080/01621459.1960.10483369}, - number = {292}, - pages = {708--713}, - volume = {55}, - abstract = {A simple exact formula for the variance of the product of two random variables, say, x and y, is given as a function of the means and central product-moments of x and y. The usual approximate variance formula for xy is compared with this exact formula; e.g., we note, in the special case where x and y are independent, that the ``variance'' computed by the approximate formula is less than the exact variance, and that the accuracy of the approximation depends on the sum of the reciprocals of the squared coefficients of variation of x and y. The case where x and y need not be independent is also studied, and exact variance formulas are presented for several different ``product estimates.'' (The usefulness of exact formulas becomes apparent when the variances of these estimates are compared.) When x and y are independent, simple unbiased estimates of these exact variances are suggested; in the more general case, consistent estimates are presented.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Bradley-1978, - author = {James V. Bradley}, - date = {1978-11}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Robustness?}, - doi = {10.1111/j.2044-8317.1978.tb00581.x}, - number = {2}, - pages = {144--152}, - volume = {31}, - publisher = {Wiley}, - annotation = {robustness}, - abstract = {The actual behaviour of the probability of a Type I error under assumption violation is quite complex, depending upon a wide variety of interacting factors. Yet allegations of robustness tend to ignore its highly particularistic nature and neglect to mention important qualifying conditions. The result is often a vast overgeneralization which nevertheless is difficult to refute since a standard quantitative definition of what constitutes robustness does not exist. Yet under any halfway reasonable quantitative definition, many of the most prevalent claims of robustness would be demonstrably false. Therefore robustness is a highly questionable concept.}, -} - -@Article{Rubin-1976, - author = {Donald B. Rubin}, - date = {1976}, - journaltitle = {Biometrika}, - title = {Inference and missing data}, - doi = {10.1093/biomet/63.3.581}, - number = {3}, - pages = {581--592}, - volume = {63}, - publisher = {Oxford University Press ({OUP})}, - abstract = {When making sampling distribution inferences about the parameter of the data, $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are `missing at random' and the observed data are `observed at random', but these inferences are generally conditional on the observed pattern of missing data. When making direct-likelihood or Bayesian inferences about $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are missing at random and the parameter of the missing data process is `distinct' from $\theta$. These conditions are the weakest general conditions under which ignoring the process that causes missing data always leads to correct inferences.}, - publisher = {Oxford University Press ({OUP})}, -} - -@Article{Baron-Kenny-1986, - author = {Reuben M. Baron and David A. Kenny}, - date = {1986}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {The moderator-mediator variable distinction in social psychological research: Conceptual, strategic, and statistical considerations}, - doi = {10.1037/0022-3514.51.6.1173}, - number = {6}, - pages = {1173--1182}, - volume = {51}, - abstract = {In this article, we attempt to distinguish between the properties of moderator and mediator variables at a number of levels. First, we seek to make theorists and researchers aware of the importance of not using the terms moderator and mediator interchangeably by carefully elaborating, both conceptually and strategically, the many ways in which moderators and mediators differ. We then go beyond this largely pedagogical function and delineate the conceptual and strategic implications of making use of such distinctions with regard to a wide range of phenomena, including control and stress, attitudes, and personality traits. We also provide a specific compendium of analytic procedures appropriate for making the most effective use of the moderator and mediator distinction, both separately and in terms of a broader causal system that includes both moderators and mediators.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Browne-1984, - author = {Michael W. Browne}, - date = {1984-05}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Asymptotically distribution-free methods for the analysis of covariance structures}, - doi = {10.1111/j.2044-8317.1984.tb00789.x}, - number = {1}, - pages = {62--83}, - volume = {37}, - abstract = {Methods for obtaining tests of fit of structural models for covariance matrices and estimator standard error which are asymptotically distribution free are derived. Modifications to standard normal theory tests and standard errors which make them applicable to the wider class of elliptical distributions are provided. A random sampling experiment to investigate some of the proposed methods is described.}, - publisher = {Wiley}, -} - -@Article{Efron-1987, - author = {Bradley Efron}, - date = {1987-03}, - journaltitle = {Journal of the American Statistical Association}, - title = {Better bootstrap confidence intervals}, - doi = {10.1080/01621459.1987.10478410}, - number = {397}, - pages = {171--185}, - volume = {82}, - abstract = {We consider the problem of setting approximate confidence intervals for a single parameter $\theta$ in a multiparameter family. The standard approximate intervals based on maximum likelihood theory, $\hat{\theta} \pm \hat{\sigma} z^{\left( \alpha \right)}$, can be quite misleading. In practice, tricks based on transformations, bias corrections, and so forth, are often used to improve their accuracy. The bootstrap confidence intervals discussed in this article automatically incorporate such tricks without requiring the statistician to think them through for each new application, at the price of a considerable increase in computational effort. The new intervals incorporate an improvement over previously suggested methods, which results in second-order correctness in a wide variety of problems. In addition to parametric families, bootstrap intervals are also developed for nonparametric situations.}, - publisher = {Informa {UK} Limited}, - keywords = {resampling methods, approximate confidence intervals, transformations, nonparametric intervals, second-order theory, skewness corrections}, -} - -@Article{Efron-1988, - author = {Bradley Efron}, - date = {1988}, - journaltitle = {Psychological Bulletin}, - title = {Bootstrap confidence intervals: Good or bad?}, - doi = {10.1037/0033-2909.104.2.293}, - number = {2}, - pages = {293--296}, - volume = {104}, - abstract = {The bootstrap is a nonparametric technique for estimating standard errors and approximate confidence intervals. Rasmussen has used a simulation experiment to suggest that bootstrap confidence intervals perform very poorly in the estimation of a correlation coefficient. Part of Rasmussen's simulation is repeated. A careful look at the results shows the bootstrap intervals performing quite well. Some remarks are made concerning the virtues and defects of bootstrap intervals in general.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{James-Brett-1984, - author = {Lawrence R. James and Jeanne M. Brett}, - date = {1984}, - journaltitle = {Journal of Applied Psychology}, - title = {Mediators, moderators, and tests for mediation}, - doi = {10.1037/0021-9010.69.2.307}, - number = {2}, - pages = {307--321}, - volume = {69}, - abstract = {Discusses mediation relations in causal terms. Influences of an antecedent are transmitted to a consequence through an intervening mediator. Mediation relations may assume a number of functional forms, including nonadditive, nonlinear, and nonrecursive forms. Although mediation and moderation are distinguishable processes, with nonadditive forms (moderated mediation) a particular variable may be both a mediator and a moderator within a single set of functional relations. Current models for testing mediation relations in industrial and organizational psychology often involve an interplay between exploratory (correlational) statistical tests and causal inference. It is suggested that no middle ground exists between exploratory and confirmatory (causal) analysis and that attempts to explain how mediation processes occur require specified causal models.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Judd-Kenny-1981, - author = {Charles M. Judd and David A. Kenny}, - date = {1981-10}, - journaltitle = {Evaluation Review}, - title = {Process analysis}, - doi = {10.1177/0193841x8100500502}, - number = {5}, - pages = {602--619}, - volume = {5}, - abstract = {This article presents the rationale and procedures for conducting a process analysis in evaluation research. Such an analysis attempts to identify the process that mediates the effects of some treatment, by estimating the parameters of a causal chain between the treatment and some outcome variable. Two different procedures for estimating mediation are discussed. In addition we present procedures for examining whether a treatment exerts its effects, in part, by altering the mediating process that produces the outcome. Finally, the benefits of process analysis in evaluation research are underlined.}, - publisher = {{SAGE} Publications}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Micceri-1989, - author = {Theodore Micceri}, - date = {1989}, - journaltitle = {Psychological Bulletin}, - title = {The unicorn, the normal curve, and other improbable creatures}, - doi = {10.1037/0033-2909.105.1.156}, - number = {1}, - pages = {156--166}, - volume = {105}, - abtsract = {An investigation of the distributional characteristics of 440 large-sample achievement and psychometric measures found all to be significantly nonnormal at the alpha .01 significance level. Several classes of contamination were found, including tail weights from the uniform to the double exponential, exponential-level asymmetry, severe digit preferences, multimodalities, and modes external to the mean/median interval. Thus, the underlying tenets of normality-assuming statistics appear fallacious for these commonly used types of data. However, findings here also fail to support the types of distributions used in most prior robustness research suggesting the failure of such statistics under nonnormal conditions. A reevaluation of the statistical robustness literature appears appropriate in light of these findings.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Sobel-1982, - author = {Michael E. Sobel}, - date = {1982}, - journaltitle = {Sociological Methodology}, - title = {Asymptotic confidence intervals for indirect effects in structural equation models}, - doi = {10.2307/270723}, - pages = {290}, - volume = {13}, - publisher = {{JSTOR}}, -} - -@Article{Sobel-1986, - author = {Michael E. Sobel}, - date = {1986}, - journaltitle = {Sociological Methodology}, - title = {Some new results on indirect effects and their standard errors in covariance structure models}, - doi = {10.2307/270922}, - pages = {159}, - volume = {16}, - publisher = {{JSTOR}}, -} - -@Article{Sobel-1987, - author = {Michael E. Sobel}, - date = {1987-08}, - journaltitle = {Sociological Methods {\&} Research}, - title = {Direct and indirect effects in linear structural equation models}, - doi = {10.1177/0049124187016001006}, - number = {1}, - pages = {155--176}, - volume = {16}, - abstract = {This article discusses total indirect effects in linear structural equation models. First, I define these effects. Second, I show how the delta method may be used to obtain the standard errors of the sample estimates of these effects and test hypotheses about the magnitudes of the indirect effects. To keep matters simple, I focus throughout on a particularly simple linear structural equation system; for a treatment of the general case, see Sobel (1986). To illustrate the ideas and results, a detailed example is presented.}, - publisher = {{SAGE} Publications}, -} - -@Article{Venzon-Moolgavkar-1988, - author = {D. J. Venzon and S. H. Moolgavkar}, - date = {1988}, - journaltitle = {Applied Statistics}, - title = {A method for computing profile-likelihood-based confidence intervals}, - doi = {10.2307/2347496}, - number = {1}, - pages = {87}, - volume = {37}, - abstract = {The method of constructing confidence regions based on the generalised likelihood ratio statistic is well known for parameter vectors. A similar construction of a confidence interval for a single entry of a vector can be implemented by repeatedly maximising over the other parameters. We present an algorithm for finding these confidence interval endpoints that requires less computation. It employs a modified Newton-Raphson iteration to solve a system of equations that defines the endpoints.}, - publisher = {{JSTOR}}, - keywords = {confidence intervals, profile likelihood}, -} - -@Article{White-1980, - author = {Halbert White}, - date = {1980-05}, - journaltitle = {Econometrica}, - title = {A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity}, - doi = {10.2307/1912934}, - number = {4}, - pages = {817--838}, - volume = {48}, - abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.}, - publisher = {{JSTOR}}, -} - -@Book{Cohen-1988, - author = {Jacob Cohen}, - date = {1988}, - title = {Statistical power analysis for the behavioral sciences}, - doi = {10.4324/9780203771587}, - edition = {2}, - isbn = {9780203771587}, - publisher = {Routledge}, - library = {HA29 .C66 1988}, - keywords = {Social sciences--Statistical methods, Probabilities, Statistical power analysis}, - addendum = {https://lccn.loc.gov/88012110}, - abstract = {Statistical Power Analysis is a nontechnical guide to power analysis in research planning that provides users of applied statistics with the tools they need for more effective analysis. The Second Edition includes: \begin{itemize} \item a chapter covering power analysis in set correlation and multivariate methods; \item a chapter considering effect size, psychometric reliability, and the efficacy of ``qualifying'' dependent variables and; \item expanded power and sample size tables for multiple regression/correlation. \end{itemize}}, -} - -@Book{NationalResearchCouncil-1982, - author = {{National Research Council}}, - date = {1982-01}, - title = {An assessment of research-doctorate programs in the {United States}: Social and behavioral sciences}, - doi = {10.17226/9781}, - location = {Washington, D.C.}, - publisher = {National Academies Press}, - annotation = {data}, -} - -@Book{Rubin-1987, - author = {Donald B. Rubin}, - date = {1987-06}, - title = {Multiple imputation for nonresponse in surveys}, - doi = {10.1002/9780470316696}, - isbn = {9780470316696}, - location = {New York}, - publisher = {John Wiley {\&} Sons, Inc.}, - library = {HA31.2 .R83 1987}, - keywords = {Multiple imputation (Statistics), Nonresponse (Statistics), Social surveys--Response rate}, - addendum = {https://lccn.loc.gov/86028935}, - annotation = {Lib-Missing-Data-Books}, - abstract = {Demonstrates how nonresponse in sample surveys and censuses can be handled by replacing each missing value with two or more multiple imputations. Clearly illustrates the advantages of modern computing to such handle surveys, and demonstrates the benefit of this statistical technique for researchers who must analyze them. Also presents the background for Bayesian and frequentist theory. After establishing that only standard complete-data methods are needed to analyze a multiply-imputed set, the text evaluates procedures in general circumstances, outlining specific procedures for creating imputations in both the ignorable and nonignorable cases. Examples and exercises reinforce ideas, and the interplay of Bayesian and frequentist ideas presents a unified picture of modern statistics.}, -} - -@Article{Bollen-Stine-1990, - author = {Kenneth A. Bollen and Robert Stine}, - date = {1990}, - journaltitle = {Sociological Methodology}, - title = {Direct and indirect effects: Classical and bootstrap estimates of variability}, - doi = {10.2307/271084}, - pages = {115}, - volume = {20}, - abstract = {The decomposition of effects in structural equation models has been of considerable interest to social scientists. Finite-sample or asymptotic results for the sampling distribution of estimators of direct effects are widely available. Statistical inferences about indirect effects have relied exclusively on asymptotic methods which assume that the limiting distribution of the estimator is normal, with a standard error derived from the delta method. We examine bootstrap procedures as another way to generate standard errors and confidence intervals and to estimate the sampling distributions of estimators of direct and indirect effects. We illustrate the classical and the bootstrap methods with three empirical examples. We find that in a moderately large sample, the bootstrap distribution of an estimator is close to that assumed with the classical and delta methods but that in small samples, there are some differences. Bootstrap methods provide a check on the classical and delta methods when the latter are applied under less than ideal conditions.}, - publisher = {{JSTOR}}, -} - -@Article{Li-Raghunathan-Rubin-1991, - author = {K. H. Li and Trivellore Eachambadi Raghunathan and Donald B. Rubin}, - date = {1991-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {Large-sample significance levels from multiply imputed data using moment-based statistics and an {$F$} reference distribution}, - doi = {10.1080/01621459.1991.10475152}, - number = {416}, - pages = {1065--1073}, - volume = {86}, - abstract = {We present a procedure for computing significance levels from data sets whose missing values have been multiply imputed data. This procedure uses moment-based statistics, $m \leq 3$ repeated imputations, and an F reference distribution. When $m = \infty$, we show first that our procedure is essentially the same as the ideal procedure in cases of practical importance and, second, that its deviations from the ideal are basically a function of the coefficient of variation of the canonical ratios of complete to observed information. For small $m$ our procedure's performance is largely governed by this coefficient of variation and the mean of these ratios. Using simulation techniques with small $m$, we compare our procedure's actual and nominal large-sample significance levels and conclude that it is essentially calibrated and thus represents a definite improvement over previously available procedures. Furthermore, we compare the large-sample power of the procedure as a function of $m$ and other factors, such as the dimensionality of the estimand and fraction of missing information, to provide guidance on the choice of the number of imputations; generally, we find the loss of power due to small $m$ to be quite modest in cases likely to occur in practice.}, - publisher = {Informa {UK} Limited}, - keywords = {imputation, missing data, nonresponse, tests of significance}, - annotation = {missing, missing-mi}, -} - -@InBook{Arbuckle-1996, - author = {James L. Arbuckle}, - booktitle = {Advanced structural equation modeling}, - date = {1996}, - title = {Full information estimation in the presence of incomplete data}, - doi = {10.4324/9781315827414}, - editor = {George A. Marcoulides and Randall E. Schumacker}, -} - -@Book{Davison-Hinkley-1997, - author = {Anthony Christopher Davison and David Victor Hinkley}, - publisher = {Cambridge University Press}, - title = {Bootstrap methods and their application}, - series = {Cambridge Series in Statistical and Probabilistic Mathematics}, - date = {1997}, - location = {Cambridge and New York, NY, USA }, - doi = {10.1017/CBO9780511802843}, - isbn = {9780521573917}, - library = {QA276.8 .D38 1997}, - keywords = {Bootstrap (Statistics)}, - addendum = {https://lccn.loc.gov/96030064}, - abstract = {Bootstrap methods are computer-intensive methods of statistical analysis, which use simulation to calculate standard errors, confidence intervals, and significance tests. The methods apply for any level of modelling, and so can be used for fully parametric, semiparametric, and completely nonparametric analysis. This 1997 book gives a broad and up-to-date coverage of bootstrap methods, with numerous applied examples, developed in a coherent way with the necessary theoretical basis. Applications include stratified data; finite populations; censored and missing data; linear, nonlinear, and smooth regression models; classification; time series and spatial problems. Special features of the book include: extensive discussion of significance tests and confidence intervals; material on various diagnostic methods; and methods for efficient computation, including improved Monte Carlo simulation. Each chapter includes both practical and theoretical exercises. S-Plus programs for implementing the methods described in the text are available from the supporting website.}, - annotation = {bootstrap}, -} - -@Book{Efron-Tibshirani-1993, - author = {Bradley Efron and Robert J. Tibshirani}, - publisher = {Chapman \& Hall}, - title = {An introduction to the bootstrap}, - series = {Monographs on statistics and applied probability ; 57}, - date = {1993}, - location = {New York}, - doi = {10.1201/9780429246593}, - isbn = {9780412042317}, - library = {QA276.8 .E3745 1993}, - addendum = {https://lccn.loc.gov/93004489}, - abstract = {Statistics is a subject of many uses and surprisingly few effective practitioners. The traditional road to statistical knowledge is blocked, for most, by a formidable wall of mathematics. The approach in An Introduction to the Bootstrap avoids that wall. It arms scientists and engineers, as well as statisticians, with the computational techniques they need to analyze and understand complicated data sets.}, - keywords = {Bootstrap (Statistics)}, -} - -@Book{Schafer-1997, - author = {Joseph L. Schafer}, - date = {1997-08}, - title = {Analysis of incomplete multivariate data}, - doi = {10.1201/9780367803025}, - isbn = {9780367803025}, - abstract = {The last two decades have seen enormous developments in statistical methods for incomplete data. The EM algorithm and its extensions, multiple imputation, and Markov Chain Monte Carlo provide a set of flexible and reliable tools from inference in large classes of missing-data problems. Yet, in practical terms, those developments have had surprisingly little impact on the way most data analysts handle missing values on a routine basis. - Analysis of Incomplete Multivariate Data helps bridge the gap between theory and practice, making these missing-data tools accessible to a broad audience. It presents a unified, Bayesian approach to the analysis of incomplete multivariate data, covering datasets in which the variables are continuous, categorical, or both. The focus is applied, where necessary, to help readers thoroughly understand the statistical properties of those methods, and the behavior of the accompanying algorithms. - All techniques are illustrated with real data examples, with extended discussion and practical advice. All of the algorithms described in this book have been implemented by the author for general use in the statistical languages S and S Plus. The software is available free of charge on the Internet.}, - publisher = {Chapman and Hall/CRC}, -} - -@Article{Bauer-Preacher-Gil-2006, - author = {Daniel J. Bauer and Kristopher J. Preacher and Karen M. Gil}, - date = {2006}, - journaltitle = {Psychological Methods}, - title = {Conceptualizing and testing random indirect effects and moderated mediation in multilevel models: New procedures and recommendations}, - doi = {10.1037/1082-989x.11.2.142}, - number = {2}, - pages = {142--163}, - volume = {11}, - abstracts = {The authors propose new procedures for evaluating direct, indirect, and total effects in multilevel models when all relevant variables are measured at Level 1 and all effects are random. Formulas are provided for the mean and variance of the indirect and total effects and for the sampling variances of the average indirect and total effects. Simulations show that the estimates are unbiased under most conditions. Confidence intervals based on a normal approximation or a simulated sampling distribution perform well when the random effects are normally distributed but less so when they are nonnormally distributed. These methods are further developed to address hypotheses of moderated mediation in the multilevel context. An example demonstrates the feasibility and usefulness of the proposed methods.}, - publisher = {American Psychological Association ({APA})}, - keywords = {multilevel model, hierarchical linear model, indirect effect, mediation, moderated mediation}, -} - -@Article{Cheung-2009a, - author = {Mike W.-L. Cheung}, - date = {2009-05}, - journaltitle = {Behavior Research Methods}, - title = {Comparison of methods for constructing confidence intervals of standardized indirect effects}, - doi = {10.3758/brm.41.2.425}, - number = {2}, - pages = {425--438}, - volume = {41}, - abstract = {Mediation models are often used as a means to explain the psychological mechanisms between an independent and a dependent variable in the behavioral and social sciences. A major limitation of the unstandardized indirect effect calculated from raw scores is that it cannot be interpreted as an effect-size measure. In contrast, the standardized indirect effect calculated from standardized scores can be a good candidate as a measure of effect size because it is scale invariant. In the present article, 11 methods for constructing the confidence intervals (CIs) of the standardized indirect effects were evaluated via a computer simulation. These included six Wald CIs, three bootstrap CIs, one likelihood-based CI, and the PRODCLIN CI. The results consistently showed that the percentile bootstrap, the bias-corrected bootstrap, and the likelihood-based approaches had the best coverage probability. Mplus, LISREL, and Mx syntax were included to facilitate the use of these preferred methods in applied settings. Future issues on the use of the standardized indirect effects are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, coverage probability, structural equation modeling approach}, -} - -@Article{Cheung-2009b, - author = {Mike W.-L. Cheung}, - date = {2009-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Constructing approximate confidence intervals for parameters with structural equation models}, - doi = {10.1080/10705510902751291}, - number = {2}, - pages = {267--294}, - volume = {16}, - abstract = {Confidence intervals (CIs) for parameters are usually constructed based on the estimated standard errors. These are known as Wald CIs. This article argues that likelihood-based CIs (CIs based on likelihood ratio statistics) are often preferred to Wald CIs. It shows how the likelihood-based CIs and the Wald CIs for many statistics and psychometric indexes can be constructed with the use of phantom variables (Rindskopf, 1984) in some of the current structural equation modeling (SEM) packages. The procedures to form CIs for the differences in correlation coefficients, squared multiple correlations, indirect effects, coefficient alphas, and reliability estimates are illustrated. A simulation study on the Pearson correlation is used to demonstrate the advantages of the likelihood-based CI over the Wald CI. Issues arising from this SEM approach and extensions of this approach are discussed.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Cheung-Lau-2007, - author = {Gordon W. Cheung and Rebecca S. Lau}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Testing mediation and suppression effects of latent variables}, - doi = {10.1177/1094428107300343}, - number = {2}, - pages = {296--325}, - volume = {11}, - abstract = {Because of the importance of mediation studies, researchers have been continuously searching for the best statistical test for mediation effect. The approaches that have been most commonly employed include those that use zero-order and partial correlation, hierarchical regression models, and structural equation modeling (SEM). This study extends MacKinnon and colleagues (MacKinnon, Lockwood, Hoffmann, West, \& Sheets, 2002; MacKinnon, Lockwood, \& Williams, 2004, MacKinnon, Warsi, \& Dwyer, 1995) works by conducting a simulation that examines the distribution of mediation and suppression effects of latent variables with SEM, and the properties of confidence intervals developed from eight different methods. Results show that SEM provides unbiased estimates of mediation and suppression effects, and that the bias-corrected bootstrap confidence intervals perform best in testing for mediation and suppression effects. Steps to implement the recommended procedures with Amos are presented.}, - publisher = {{SAGE} Publications}, - keywords = {mediating effects, suppression effects, structural equation modeling}, -} - -@Article{CribariNeto-Souza-Vasconcellos-2007, - author = {Francisco Cribari-Neto and Tatiene C. Souza and Klaus L. P. Vasconcellos}, - date = {2007-08}, - journaltitle = {Communications in Statistics - Theory and Methods}, - title = {Inference under heteroskedasticity and leveraged data}, - doi = {10.1080/03610920601126589}, - number = {10}, - pages = {1877--1888}, - volume = {36}, - abstract = {We evaluate the finite-sample behavior of different heteros-ke-das-ticity-consistent covariance matrix estimators, under both constant and unequal error variances. We consider the estimator proposed by Halbert White (HC0), and also its variants known as HC2, HC3, and HC4; the latter was recently proposed by Cribari-Neto (2004). We propose a new covariance matrix estimator: HC5. It is the first consistent estimator to explicitly take into account the effect that the maximal leverage has on the associated inference. Our numerical results show that quasi-$t$ inference based on HC5 is typically more reliable than inference based on other covariance matrix estimators.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Fritz-MacKinnon-2007, - author = {Matthew S. Fritz and David P. MacKinnon}, - date = {2007-03}, - journaltitle = {Psychological Science}, - title = {Required sample size to detect the mediated effect}, - doi = {10.1111/j.1467-9280.2007.01882.x}, - number = {3}, - pages = {233--239}, - volume = {18}, - abstract = {Mediation models are widely used, and there are many tests of the mediated effect. One of the most common questions that researchers have when planning mediation studies is, ``How many subjects do I need to achieve adequate power when testing for mediation?'' This article presents the necessary sample sizes for six of the most common and the most recommended tests of mediation for various combinations of parameters, to provide a guide for researchers when designing studies or applying for grants.}, - publisher = {{SAGE} Publications}, - keywords = {bootstrap, collinearity, mediation analysis, power, tolerance}, -} - -@Article{Graham-Olchowski-Gilreath-2007, - author = {John W. Graham and Allison E. Olchowski and Tamika D. Gilreath}, - date = {2007-06}, - journaltitle = {Prevention Science}, - title = {How many imputations are really needed? Some practical clarifications of multiple imputation theory}, - doi = {10.1007/s11121-007-0070-9}, - number = {3}, - pages = {206--213}, - volume = {8}, - abstract = {Multiple imputation (MI) and full information maximum likelihood (FIML) are the two most common approaches to missing data analysis. In theory, MI and FIML are equivalent when identical models are tested using the same variables, and when m, the number of imputations performed with MI, approaches infinity. However, it is important to know how many imputations are necessary before MI and FIML are sufficiently equivalent in ways that are important to prevention scientists. MI theory suggests that small values of m, even on the order of three to five imputations, yield excellent results. Previous guidelines for sufficient m are based on relative efficiency, which involves the fraction of missing information ($\gamma$) for the parameter being estimated, and m. In the present study, we used a Monte Carlo simulation to test MI models across several scenarios in which $\gamma$ and m were varied. Standard errors and p-values for the regression coefficient of interest varied as a function of m, but not at the same rate as relative efficiency. Most importantly, statistical power for small effect sizes diminished as m became smaller, and the rate of this power falloff was much greater than predicted by changes in relative efficiency. Based our findings, we recommend that researchers using MI should perform many more imputations than previously considered sufficient. These recommendations are based on $\gamma$, and take into consideration one's tolerance for a preventable power falloff (compared to FIML) due to using too few imputations.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {multiple imputation, number of imputations, full information maximum likelihood, missing data, statistical power}, -} - -@Article{MacKinnon-Fritz-Williams-etal-2007, - author = {David P. MacKinnon and Matthew S. Fritz and Jason Williams and Chondra M. Lockwood}, - date = {2007-08}, - journaltitle = {Behavior Research Methods}, - title = {Distribution of the product confidence limits for the indirect effect: Program {PRODCLIN}}, - doi = {10.3758/bf03193007}, - number = {3}, - pages = {384--389}, - volume = {39}, - abstract = {This article describes a program, PRODCLIN (distribution of the PRODuct Confidence Limits for INdirect effects), written for SAS, SPSS, and R, that computes confidence limits for the product of two normal random variables. The program is important because it can be used to obtain more accurate confidence limits for the indirect effect, as demonstrated in several recent articles (MacKinnon, Lockwood, \& Williams, 2004; Pituch, Whittaker, \& Stapleton, 2005). Tests of the significance of and confidence limits for indirect effects based on the distribution of the product method have more accurate Type I error rates and more power than other, more commonly used tests. Values for the two paths involved in the indirect effect and their standard errors are entered in the PRODCLIN program, and distribution of the product confidence limits are computed. Several examples are used to illustrate the PRODCLIN program. The PRODCLIN programs in rich text format may be downloaded from www.psychonomic.org/archive.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{MacKinnon-Lockwood-Hoffman-etal-2002, - author = {David P. MacKinnon and Chondra M. Lockwood and Jeanne M. Hoffman and Stephen G. West and Virgil Sheets}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {A comparison of methods to test mediation and other intervening variable effects}, - doi = {10.1037/1082-989x.7.1.83}, - number = {1}, - pages = {83--104}, - volume = {7}, - abstract = {A Monte Carlo study compared 14 methods to test the statistical significance of the intervening variable effect. An intervening variable (mediator) transmits the effect of an independent variable to a dependent variable. The commonly used R. M. Baron and D. A. Kenny (1986) approach has low statistical power. Two methods based on the distribution of the product and 2 difference-in-coefficients methods have the most accurate Type I error rates and greatest statistical power except in 1 important case in which Type I error rates are too high. The best balance of Type I error and statistical power across all cases is the test of the joint significance of the two effects comprising the intervening variable effect.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{MacKinnon-Lockwood-Williams-2004, - author = {David P. MacKinnon and Chondra M. Lockwood and Jason Williams}, - date = {2004-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {Confidence limits for the indirect effect: Distribution of the product and resampling methods}, - doi = {10.1207/s15327906mbr3901_4}, - number = {1}, - pages = {99--128}, - volume = {39}, - abstract = {The most commonly used method to test an indirect effect is to divide the estimate of the indirect effect by its standard error and compare the resulting z statistic with a critical value from the standard normal distribution. Confidence limits for the indirect effect are also typically based on critical values from the standard normal distribution. This article uses a simulation study to demonstrate that confidence limits are imbalanced because the distribution of the indirect effect is normal only in special cases. Two alternatives for improving the performance of confidence limits for the indirect effect are evaluated: (a) a method based on the distribution of the product of two normal random variables, and (b) resampling methods. In Study 1, confidence limits based on the distribution of the product are more accurate than methods based on an assumed normal distribution but confidence limits are still imbalanced. Study 2 demonstrates that more accurate confidence limits are obtained using resampling methods, with the bias-corrected bootstrap the best method overall.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-montecarlo, mediation-prodclin}, -} - -@Article{Peugh-Enders-2004, - author = {James L. Peugh and Craig K. Enders}, - date = {2004-12}, - journaltitle = {Review of Educational Research}, - title = {Missing data in educational research: A review of reporting practices and suggestions for improvement}, - doi = {10.3102/00346543074004525}, - number = {4}, - pages = {525--556}, - volume = {74}, - publisher = {American Educational Research Association ({AERA})}, - abstract = {Missing data analyses have received considerable recent attention in the methodological literature, and two ``modern'' methods, multiple imputation and maximum likelihood estimation, are recommended. The goals of this article are to (a) provide an overview of missing-data theory, maximum likelihood estimation, and multiple imputation; (b) conduct a methodological review of missing-data reporting practices in 23 applied research journals; and (c) provide a demonstration of multiple imputation and maximum likelihood estimation using the Longitudinal Study of American Youth data. The results indicated that explicit discussions of missing data increased substantially between 1999 and 2003, but the use of maximum likelihood estimation or multiple imputation was rare; the studies relied almost exclusively on listwise and pairwise deletion.}, - keywords = {EM algorithm, maximum likelihood estimation, missing data, multiple imputation, NORM}, -} - -@Article{Preacher-Hayes-2004, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2004-11}, - journaltitle = {Behavior Research Methods, Instruments, \& Computers}, - title = {{SPSS} and {SAS} procedures for estimating indirect effects in simple mediation models}, - doi = {10.3758/bf03206553}, - number = {4}, - pages = {717--731}, - volume = {36}, - abstract = {Researchers often conduct mediation analysis in order to indirectly assess the effect of a proposed cause on some outcome through a proposed mediator. The utility of mediation analysis stems from its ability to go beyond the merely descriptive to a more functional understanding of the relationships among variables. A necessary component of mediation is a statistically and practically significant indirect effect. Although mediation hypotheses are frequently explored in psychological research, formal significance tests of indirect effects are rarely conducted. After a brief overview of mediation, we argue the importance of directly testing the significance of indirect effects and provide SPSS and SAS macros that facilitate estimation of the indirect effect with a normal theory approach and a bootstrap approach to obtaining confidence intervals, as well as the traditional approach advocated by Baron and Kenny (1986). We hope that this discussion and the macros will enhance the frequency of formal mediation tests in the psychology literature. Electronic copies of these macros may be downloaded from the Psychonomic Society’s Web archive at www.psychonomic.org/archive/.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {life satisfaction, indirect effect, mediation analysis, cognitive therapy, Sobel test}, -} - -@Article{Preacher-Hayes-2008, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2008-08}, - journaltitle = {Behavior Research Methods}, - title = {Asymptotic and resampling strategies for assessing and comparing indirect effects in multiple mediator models}, - doi = {10.3758/brm.40.3.879}, - number = {3}, - pages = {879--891}, - volume = {40}, - abstract = {Hypotheses involving mediation are common in the behavioral sciences. Mediation exists when a predictor affects a dependent variable indirectly through at least one intervening variable, or mediator. Methods to assess mediation involving multiple simultaneous mediators have received little attention in the methodological literature despite a clear need. We provide an overview of simple and multiple mediation and explore three approaches that can be used to investigate indirect processes, as well as methods for contrasting two or more mediators within a single model. We present an illustrative example, assessing and contrasting potential mediators of the relationship between the helpfulness of socialization agents and job satisfaction. We also provide SAS and SPSS macros, as well as Mplus and LISREL syntax, to facilitate the use of these methods in applications.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {indirect effect, structural equation modeling, residual covariance, total indirect effect, multiple mediator model}, -} - -@Article{Raghunathan-Lepkowski-Hoewyk-etal-2001, - author = {Trivellore E. Raghunathan and James M. Lepkowski and John Van Hoewyk and Peter Solenberger}, - date = {2001}, - journaltitle = {Survey Methodology}, - title = {A multivariate technique for multiply imputing missing values using a sequence of regression models}, - number = {1}, - pages = {85--95}, - volume = {27}, - abstract = {This article describes and evaluates a procedure for imputing missing values for a relatively complex data structure when the data are missing at random. The imputations are obtained by fitting a sequence of regression models and drawing values from the corresponding predictive distributions. The types of regression models used are linear, logistic, Poisson, generalized logit or a mixture of these depending on the type of variable being imputed. Two additional common features in the imputation process are incorporated: restriction to a relevant subpopulation for some variables and logical bounds or constraints for the imputed values. The restrictions involve subsetting the sample individuals that satisfy certain criteria while fitting the regression models. The bounds involve drawing values from a truncated predictive distribution. The development of this method was partly motivated by the analysis of two data sets which are used as illustrations. The sequential regression procedure is applied to perform multiple imputation analysis for the two applied problems. The sampling properties of inferences from multiply imputed data sets created using the sequential regression method are evaluated through simulated data sets.}, - keywords = {item nonresponse, missing at random, multiple imputation, nonignorable missing mechanism, regression, sampling properties and simulations}, -} - -@Article{Schafer-Graham-2002, - author = {Joseph L. Schafer and John W. Graham}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Missing data: Our view of the state of the art}, - doi = {10.1037/1082-989x.7.2.147}, - number = {2}, - pages = {147--177}, - volume = {7}, - abstract = {Statistical procedures for missing data have vastly improved, yet misconception and unsound practice still abound. The authors frame the missing-data problem, review methods, offer advice, and raise issues that remain unresolved. They clear up common misunderstandings regarding the missing at random (MAR) concept. They summarize the evidence against older procedures and, with few exceptions, discourage their use. They present, in both technical and practical language, 2 general approaches that come highly recommended: maximum likelihood (ML) and Bayesian multiple imputation (MI). Newer developments are discussed, including some for dealing with missing data that are not MAR. Although not yet in the mainstream, these procedures may eventually extend the ML and MI methods that currently represent the state of the art.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Serlin-2000, - author = {Ronald C. Serlin}, - date = {2000}, - journaltitle = {Psychological Methods}, - title = {Testing for robustness in {Monte Carlo} studies}, - doi = {10.1037/1082-989x.5.2.230}, - number = {2}, - pages = {230--240}, - volume = {5}, - abstract = {Monte Carlo studies provide the information needed to help researchers select appropriate analytical procedures under design conditions in which the underlying assumptions of the procedures are not met. In Monte Carlo studies, the 2 errors that one could commit involve (a) concluding that a statistical procedure is robust when it is not or (b) concluding that it is not robust when it is. In previous attempts to apply standard statistical design principles to Monte Carlo studies, the less severe of these errors has been wrongly designated the Type I error. In this article, a method is presented for controlling the appropriate Type I error rate; the determination of the number of iterations required in a Monte Carlo study to achieve desired power is described; and a confidence interval for a test's true Type I error rate is derived. A robustness criterion is also proposed that is a compromise between W. G. Cochran's (1952) and J. V. Bradley's (1978) criteria.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Shrout-Bolger-2002, - author = {Patrick E. Shrout and Niall Bolger}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Mediation in experimental and nonexperimental studies: New procedures and recommendations}, - doi = {10.1037/1082-989x.7.4.422}, - number = {4}, - pages = {422--445}, - volume = {7}, - publisher = {American Psychological Association ({APA})}, - abstract = {Mediation is said to occur when a causal effect of some variable $X$ on an outcome $Y$ is explained by some intervening variable $M$. The authors recommend that with small to moderate samples, bootstrap methods (B. Efron \& R. Tibshirani, 1993) be used to assess mediation. Bootstrap tests are powerful because they detect that the sampling distribution of the mediated effect is skewed away from 0. They argue that R. M. Baron and D. A. Kenny's (1986) recommendation of first testing the $X \to Y$ association for statistical significance should not be a requirement when there is a priori belief that the effect size is small or suppression is a possibility. Empirical examples and computer setups for bootstrap analyses are provided.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Taylor-MacKinnon-Tein-2007, - author = {Aaron B. Taylor and David P. MacKinnon and Jenn-Yun Tein}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Tests of the three-path mediated effect}, - doi = {10.1177/1094428107300344}, - number = {2}, - pages = {241--269}, - volume = {11}, - abstract = {In a three-path mediational model, two mediators intervene in a series between an independent and a dependent variable. Methods of testing for mediation in such a model are generalized from the more often used single-mediator model. Six such methods are introduced and compared in a Monte Carlo study in terms of their Type I error, power, and coverage. Based on its results, the joint significance test is preferred when only a hypothesis test is of interest. The percentile bootstrap and bias-corrected bootstrap are preferred when a confidence interval on the mediated effect is desired, with the latter having more power but also slightly inflated Type I error in some conditions.}, - publisher = {{SAGE} Publications}, - keywords = {mediation, bootstrapping}, -} - -@Article{vanBuuren-Brand-GroothuisOudshoorn-etal-2006, - author = {Stef {van Buuren} and J. P. L. Brand and C. G. M. Groothuis-Oudshoorn and Donald B. Rubin}, - date = {2006-12}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Fully conditional specification in multivariate imputation}, - doi = {10.1080/10629360600810434}, - number = {12}, - pages = {1049--1064}, - volume = {76}, - abstract = {The use of the Gibbs sampler with fully conditionally specified models, where the distribution of each variable given the other variables is the starting point, has become a popular method to create imputations in incomplete multivariate data. The theoretical weakness of this approach is that the specified conditional densities can be incompatible, and therefore the stationary distribution to which the Gibbs sampler attempts to converge may not exist. This study investigates practical consequences of this problem by means of simulation. Missing data are created under four different missing data mechanisms. Attention is given to the statistical behavior under compatible and incompatible models. The results indicate that multiple imputation produces essentially unbiased estimates with appropriate coverage in the simple cases investigated, even for the incompatible models. Of particular interest is that these results were produced using only five Gibbs iterations starting from a simple draw from observed marginal distributions. It thus appears that, despite the theoretical weaknesses, the actual performance of conditional model specification for multivariate imputation can be quite good, and therefore deserves further study.}, - publisher = {Informa {UK} Limited}, - keywords = {multivariate missing data, multiple imputation, distributional compatibility, Gibbs sampling, simulation, proper imputation}, -} - -@Article{Yuan-Bentler-2000, - author = {Ke-Hai Yuan and Peter M. Bentler}, - date = {2000-08}, - journaltitle = {Sociological Methodology}, - title = {Three likelihood-based methods for mean and covariance structure analysis with nonnormal missing data}, - doi = {10.1111/0081-1750.00078}, - number = {1}, - pages = {165--200}, - volume = {30}, - abstract = {Survey and longitudinal studies in the social and behavioral sciences generally contain missing data. Mean and covariance structure models play an important role in analyzing such data. Two promising methods for dealing with missing data are a direct maximum-likelihood and a two-stage approach based on the unstructured mean and covariance estimates obtained by the EM-algorithm. Typical assumptions under these two methods are ignorable nonresponse and normality of data. However, data sets in social and behavioral sciences are seldom normal, and experience with these procedures indicates that normal theory based methods for nonnormal data very often lead to incorrect model evaluations. By dropping the normal distribution assumption, we develop more accurate procedures for model inference. Based on the theory of generalized estimating equations, a way to obtain consistent standard errors of the two-stage estimates is given. The asymptotic efficiencies of different estimators are compared under various assumptions. We also propose a minimum chi-square approach and show that the estimator obtained by this approach is asymptotically at least as efficient as the two likelihood-based estimators for either normal or nonnormal data. The major contribution of this paper is that for each estimator, we give a test statistic whose asymptotic distribution is chisquare as long as the underlying sampling distribution enjoys finite fourth-order moments. We also give a characterization for each of the two likelihood ratio test statistics when the underlying distribution is nonnormal. Modifications to the likelihood ratio statistics are also given. Our working assumption is that the missing data mechanism is missing completely at random. Examples and Monte Carlo studies indicate that, for commonly encountered nonnormal distributions, the procedures developed in this paper are quite reliable even for samples with missing data that are missing at random.}, - publisher = {{SAGE} Publications}, -} - -@Book{MacKinnon-2008, - author = {David P. MacKinnon}, - series = {Multivariate applications}, - date = {2008}, - title = {Introduction to statistical mediation analysis}, - doi = {10.4324/9780203809556}, - isbn = {9780805864298}, - location = {Hoboken}, - pages = {488}, - library = {QA278.2 .M29 2008}, - addendum = {https://lccn.loc.gov/2007011793}, - abstract = {This volume introduces the statistical, methodological, and conceptual aspects of mediation analysis. Applications from health, social, and developmental psychology, sociology, communication, exercise science, and epidemiology are emphasized throughout. Single-mediator, multilevel, and longitudinal models are reviewed. The author's goal is to help the reader apply mediation analysis to their own data and understand its limitations. - Each chapter features an overview, numerous worked examples, a summary, and exercises (with answers to the odd numbered questions). The accompanying downloadable resources contain outputs described in the book from SAS, SPSS, LISREL, EQS, MPLUS, and CALIS, and a program to simulate the model. The notation used is consistent with existing literature on mediation in psychology. - The book opens with a review of the types of research questions the mediation model addresses. Part II describes the estimation of mediation effects including assumptions, statistical tests, and the construction of confidence limits. Advanced models including mediation in path analysis, longitudinal models, multilevel data, categorical variables, and mediation in the context of moderation are then described. The book closes with a discussion of the limits of mediation analysis, additional approaches to identifying mediating variables, and future directions. - Introduction to Statistical Mediation Analysis is intended for researchers and advanced students in health, social, clinical, and developmental psychology as well as communication, public health, nursing, epidemiology, and sociology. Some exposure to a graduate level research methods or statistics course is assumed. The overview of mediation analysis and the guidelines for conducting a mediation analysis will be appreciated by all readers.}, - publisher = {Erlbaum Psych Press}, - keywords = {Mediation (Statistics)}, -} - -@Book{Venables-Ripley-2002, - author = {W. N. Venables and B. D. Ripley}, - date = {2002}, - title = {Modern applied statistics with {S}}, - doi = {10.1007/978-0-387-21706-2}, - publisher = {Springer New York}, -} - -@Article{Biesanz-Falk-Savalei-2010, - author = {Jeremy C. Biesanz and Carl F. Falk and Victoria Savalei}, - date = {2010-08}, - journaltitle = {Multivariate Behavioral Research}, - title = {Assessing mediational models: Testing and interval estimation for indirect effects}, - doi = {10.1080/00273171.2010.498292}, - number = {4}, - pages = {661--701}, - volume = {45}, - abstract = {Theoretical models specifying indirect or mediated effects are common in the social sciences. An indirect effect exists when an independent variable's influence on the dependent variable is mediated through an intervening variable. Classic approaches to assessing such mediational hypotheses (Baron \& Kenny, 1986; Sobel, 1982) have in recent years been supplemented by computationally intensive methods such as bootstrapping, the distribution of the product methods, and hierarchical Bayesian Markov chain Monte Carlo (MCMC) methods. These different approaches for assessing mediation are illustrated using data from Dunn, Biesanz, Human, and Finn (2007). However, little is known about how these methods perform relative to each other, particularly in more challenging situations, such as with data that are incomplete and/or nonnormal. This article presents an extensive Monte Carlo simulation evaluating a host of approaches for assessing mediation. We examine Type I error rates, power, and coverage. We study normal and nonnormal data as well as complete and incomplete data. In addition, we adapt a method, recently proposed in statistical literature, that does not rely on confidence intervals (CIs) to test the null hypothesis of no indirect effect. The results suggest that the new inferential method--the partial posterior p value--slightly outperforms existing ones in terms of maintaining Type I error rates while maximizing power, especially with incomplete data. Among confidence interval approaches, the bias-corrected accelerated (BCa) bootstrapping approach often has inflated Type I error rates and inconsistent coverage and is not recommended.In contrast, the bootstrapped percentile confidence interval and the hierarchical Bayesian MCMC method perform best overall,maintaining Type I error rates,exhibiting reasonable power, and producing stable and accurate coverage rates.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Blanca-Arnau-LopezMontiel-etal-2013, - author = {Mar\'\iaJ. Blanca and Jaume Arnau and Dolores L{\a'o}pez-Montiel and Roser Bono and Rebecca Bendayan}, - date = {2013-05}, - journaltitle = {Methodology}, - title = {Skewness and kurtosis in real data samples}, - doi = {10.1027/1614-2241/a000057}, - number = {2}, - pages = {78--84}, - volume = {9}, - abstract = {Parametric statistics are based on the assumption of normality. Recent findings suggest that Type I error and power can be adversely affected when data are non-normal. This paper aims to assess the distributional shape of real data by examining the values of the third and fourth central moments as a measurement of skewness and kurtosis in small samples. The analysis concerned 693 distributions with a sample size ranging from 10 to 30. Measures of cognitive ability and of other psychological variables were included. The results showed that skewness ranged between -2.49 and 2.33. The values of kurtosis ranged between -1.92 and 7.41. Considering skewness and kurtosis together the results indicated that only 5.5\% of distributions were close to expected values under normality. Although extreme contamination does not seem to be very frequent, the findings are consistent with previous research suggesting that normality is not the rule with real data.}, - publisher = {Hogrefe Publishing Group}, -} - -@Article{Boettiger-Eddelbuettel-2017, - author = {Carl Boettiger and Dirk Eddelbuettel}, - date = {2017}, - journaltitle = {The R Journal}, - title = {An introduction to {Rocker}: Docker containers for {R}}, - doi = {10.32614/rj-2017-065}, - number = {2}, - pages = {527}, - volume = {9}, - abstract = {We describe the Rocker project, which provides a widely-used suite of Docker images with customized R environments for particular tasks. We discuss how this suite is organized, and how these tools can increase portability, scaling, reproducibility, and convenience of R users and developers.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-docker-rocker}, -} - -@Article{Chow-Ho-Hamaker-etal-2010, - author = {Sy-Miin Chow and Moon-ho R. Ho and Ellen L. Hamaker and Conor V. Dolan}, - date = {2010-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Equivalence and differences between structural equation modeling and state-space modeling techniques}, - doi = {10.1080/10705511003661553}, - number = {2}, - pages = {303--332}, - volume = {17}, - abstract = {State-space modeling techniques have been compared to structural equation modeling (SEM) techniques in various contexts but their unique strengths have often been overshadowed by their similarities to SEM. In this article, we provide a comprehensive discussion of these 2 approaches' similarities and differences through analytic comparisons and numerical simulations, with a focus on their use in representing intraindividual dynamics and interindividual differences. To demonstrate the respective strengths and weaknesses of the 2 approaches in representing these 2 aspects, we simulated data under (a) a cross-sectional common factor model, (b) a latent difference score model with random effects in intercept and slope, and (c) a bivariate dynamic factor analysis model with auto- and cross-regression parameters. Possible ways in which SEM and state-space modeling can be utilized as complementary tools in representing human developmental and other related processes are discussed.}, - publisher = {Informa {UK} Limited}, - annotation = {ild, sem, ssm}, -} - -@Article{Deboeck-Preacher-2015, - author = {Pascal R. Deboeck and Kristopher J. Preacher}, - date = {2015-06}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {No Need to be Discrete: A Method for Continuous Time Mediation Analysis}, - doi = {10.1080/10705511.2014.973960}, - number = {1}, - pages = {61--75}, - volume = {23}, - abstract = {Mediation is one concept that has shaped numerous theories. The list of problems associated with mediation models, however, has been growing. Mediation models based on cross-sectional data can produce unexpected estimates, so much so that making longitudinal or causal inferences is inadvisable. Even longitudinal mediation models have faults, as parameter estimates produced by these models are specific to the lag between observations, leading to much debate over appropriate lag selection. Using continuous time models (CTMs) rather than commonly employed discrete time models, one can estimate lag-independent parameters. We demonstrate methodology that allows for continuous time mediation analyses, with attention to concepts such as indirect and direct effects, partial mediation, the effect of lag, and the lags at which relations become maximal. A simulation compares common longitudinal mediation methods with CTMs. Reanalysis of a published covariance matrix demonstrates that CTMs can be fit to data used in longitudinal mediation studies.}, - publisher = {Informa {UK} Limited}, - keywords = {continuous time models, cross-lagged panel model, exact discrete model, longitudinal mediation, mediation}, - annotation = {ild, ild-mediation}, -} - -@Article{Dudgeon-2017, - author = {Paul Dudgeon}, - date = {2017-03}, - journaltitle = {Psychometrika}, - title = {Some improvements in confidence intervals for standardized regression coefficients}, - doi = {10.1007/s11336-017-9563-z}, - number = {4}, - pages = {928--951}, - volume = {82}, - keywords = {standardized regression coefficients, robust confidence intervals, non-normality}, - abstract = {Yuan and Chan (Psychometrika 76:670–690, 2011. doi:10.1007/S11336-011-9224-6) derived consistent confidence intervals for standardized regression coefficients under fixed and random score assumptions. Jones and Waller (Psychometrika 80:365–378, 2015. doi:10.1007/S11336-013-9380-Y) extended these developments to circumstances where data are non-normal by examining confidence intervals based on Browne's (Br J Math Stat Psychol 37:62–83, 1984. doi:10.1111/j.2044-8317.1984.tb00789.x) asymptotic distribution-free (ADF) theory. Seven different heteroscedastic-consistent (HC) estimators were investigated in the current study as potentially better solutions for constructing confidence intervals on standardized regression coefficients under non-normality. Normal theory, ADF, and HC estimators were evaluated in a Monte Carlo simulation. Findings confirmed the superiority of the HC3 (MacKinnon and White, J Econ 35:305–325, 1985. doi:10.1016/0304-4076(85)90158-7) and HC5 (Cribari-Neto and Da Silva, Adv Stat Anal 95:129–146, 2011. doi:10.1007/s10182-010-0141-2) interval estimators over Jones and Waller's ADF estimator under all conditions investigated, as well as over the normal theory method. The HC5 estimator was more robust in a restricted set of conditions over the HC3 estimator. Some possible extensions of HC estimators to other effect size measures are considered for future developments.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Eddelbuettel-Francois-2011, - author = {Dirk Eddelbuettel and Romain Fran{\c c}ois}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{Rcpp}: Seamless {R} and {C++} integration}, - doi = {10.18637/jss.v040.i08}, - number = {8}, - volume = {40}, - abstract = {The Rcpp package simplifies integrating C++ code with R. It provides a consistent C++ class hierarchy that maps various types of R objects (vectors, matrices, functions, environments, ...) to dedicated C++ classes. Object interchange between R and C++ is managed by simple, flexible and extensible concepts which include broad support for C++ Standard Template Library idioms. C++ code can both be compiled, linked and loaded on the fly, or added via packages. Flexible error and exception code handling is provided. Rcpp substantially lowers the barrier for programmers wanting to combine C++ code with R.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages}, -} - -@Article{Hayes-Scharkow-2013, - author = {Andrew F. Hayes and Michael Scharkow}, - date = {2013-08}, - journaltitle = {Psychological Science}, - title = {The relative trustworthiness of inferential tests of the indirect effect in statistical mediation analysis}, - doi = {10.1177/0956797613480187}, - number = {10}, - pages = {1918--1927}, - volume = {24}, - abstract = {A content analysis of 2 years of Psychological Science articles reveals inconsistencies in how researchers make inferences about indirect effects when conducting a statistical mediation analysis. In this study, we examined the frequency with which popularly used tests disagree, whether the method an investigator uses makes a difference in the conclusion he or she will reach, and whether there is a most trustworthy test that can be recommended to balance practical and performance considerations. We found that tests agree much more frequently than they disagree, but disagreements are more common when an indirect effect exists than when it does not. We recommend the bias-corrected bootstrap confidence interval as the most trustworthy test if power is of utmost concern, although it can be slightly liberal in some circumstances. Investigators concerned about Type I errors should choose the Monte Carlo confidence interval or the distribution-of-the-product approach, which rarely disagree. The percentile bootstrap confidence interval is a good compromise test.}, - publisher = {{SAGE} Publications}, -} - -@Article{Hunter-2017, - author = {Michael D. Hunter}, - date = {2017-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {State Space Modeling in an Open Source, Modular, Structural Equation Modeling Environment}, - doi = {10.1080/10705511.2017.1369354}, - number = {2}, - pages = {307--324}, - volume = {25}, - abstract = {State space models (SSMs) are introduced in the context of structural equation modeling (SEM). In particular, the OpenMx implementation of SSMs using the Kalman filter and prediction error decomposition is discussed. In reflection of modularity, the implementation uses the same full information maximum likelihood missing data procedures for SSMs and SEMs. Similarly, generic OpenMx features such as likelihood ratio tests, profile likelihood confidence intervals, Hessian-based standard errors, definition variables, and the matrix algebra interface are all supported. Example scripts for specification of autoregressive models, multiple lag models (VAR(p)), multiple lag moving average models (VARMA(p, q)), multiple subject models, and latent growth models are provided. Additionally, latent variable calculation based on the Kalman filter and raw data generation based on a model are all included. Finally, future work for extending SSMs to allow for random effects and for presenting them in diagrams is discussed.}, - publisher = {Informa {UK} Limited}, - keywords = {state space model, software, Kalman filter, OpenMx}, - annotation = {ild, ild-software, sem, sem-software, ssm, ssm-software}, -} - -@Article{Jones-Waller-2013a, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013}, - journaltitle = {Psychological Methods}, - title = {Computing confidence intervals for standardized regression coefficients.}, - doi = {10.1037/a0033269}, - number = {4}, - pages = {435--453}, - volume = {18}, - abstract = {With fixed predictors, the standard method (Cohen, Cohen, West, \& Aiken, 2003, p. 86; Harris, 2001, p. 80; Hays, 1994, p. 709) for computing confidence intervals (CIs) for standardized regression coefficients fails to account for the sampling variability of the criterion standard deviation. With random predictors, this method also fails to account for the sampling variability of the predictor standard deviations. Nevertheless, under some conditions the standard method will produce CIs with accurate coverage rates. To delineate these conditions, we used a Monte Carlo simulation to compute empirical CI coverage rates in samples drawn from 36 populations with a wide range of data characteristics. We also computed the empirical CI coverage rates for 4 alternative methods that have been discussed in the literature: noncentrality interval estimation, the delta method, the percentile bootstrap, and the bias-corrected and accelerated bootstrap. Our results showed that for many data-parameter configurations--for example, sample size, predictor correlations, coefficient of determination ($R^2$), orientation of $\beta$ with respect to the eigenvectors of the predictor correlation matrix, $R_X$--the standard method produced coverage rates that were close to their expected values. However, when population $R^2$ was large and when $\beta$ approached the last eigenvector of $R_X$, then the standard method coverage rates were frequently below the nominal rate (sometimes by a considerable amount). In these conditions, the delta method and the 2 bootstrap procedures were consistently accurate. Results using noncentrality interval estimation were inconsistent. In light of these findings, we recommend that researchers use the delta method to evaluate the sampling variability of standardized regression coefficients.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Jones-Waller-2015, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2015-06}, - journaltitle = {Psychometrika}, - title = {The Normal-Theory and Asymptotic Distribution-Free ({ADF}) Covariance Matrix of Standardized Regression Coefficients: Theoretical Extensions and Finite Sample Behavior}, - doi = {10.1007/s11336-013-9380-y}, - number = {2}, - pages = {365--378}, - volume = {80}, - abstract = {Yuan and Chan (Psychometrika, 76, 670–690, 2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a method for computing this covariance matrix from correlations. Next, we describe an asymptotic distribution-free (ADF; Browne in British Journal of Mathematical and Statistical Psychology, 37, 62–83, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that the ADF method works well with nonnormal data in moderate-to-large samples using both simulated and real-data examples. R code (R Development Core Team, 2012) is available from the authors or through the Psychometrika online repository for supplementary materials.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {standardized-regression, standardized-regression-hc}, -} - -@Article{Koopman-Howe-Hollenbeck-etal-2015, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck and Hock-Peng Sin}, - date = {2015}, - journaltitle = {Journal of Applied Psychology}, - title = {Small sample mediation testing: Misplaced confidence in bootstrapped confidence intervals}, - doi = {10.1037/a0036635}, - number = {1}, - pages = {194--202}, - volume = {100}, - abstract = {Bootstrapping is an analytical tool commonly used in psychology to test the statistical significance of the indirect effect in mediation models. Bootstrapping proponents have particularly advocated for its use for samples of 20-80 cases. This advocacy has been heeded, especially in the Journal of Applied Psychology, as researchers are increasingly utilizing bootstrapping to test mediation with samples in this range. We discuss reasons to be concerned with this escalation, and in a simulation study focused specifically on this range of sample sizes, we demonstrate not only that bootstrapping has insufficient statistical power to provide a rigorous hypothesis test in most conditions but also that bootstrapping has a tendency to exhibit an inflated Type I error rate. We then extend our simulations to investigate an alternative empirical resampling method as well as a Bayesian approach and demonstrate that they exhibit comparable statistical power to bootstrapping in small samples without the associated inflated Type I error. Implications for researchers testing mediation hypotheses in small samples are presented. For researchers wishing to use these methods in their own research, we have provided R syntax in the online supplemental materials.}, - publisher = {American Psychological Association ({APA})}, - keywords = {mediation, bootstrapping, permutation, Bayes}, -} - -@Article{Kurtzer-Sochat-Bauer-2017, - author = {Gregory M. Kurtzer and Vanessa Sochat and Michael W. Bauer}, - date = {2017-05}, - journaltitle = {{PLOS} {ONE}}, - title = {{Singularity}: Scientific containers for mobility of compute}, - doi = {10.1371/journal.pone.0177459}, - editor = {Attila Gursoy}, - number = {5}, - pages = {e0177459}, - volume = {12}, - publisher = {Public Library of Science ({PLoS})}, - annotation = {container, container-singularity}, -} - -@Article{Kwan-Chan-2011, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2011-04}, - journaltitle = {Behavior Research Methods}, - title = {Comparing standardized coefficients in structural equation modeling: A model reparameterization approach}, - doi = {10.3758/s13428-011-0088-6}, - number = {3}, - pages = {730--745}, - volume = {43}, - abstract = {We propose a two-stage method for comparing standardized coefficients in structural equation modeling (SEM). At stage 1, we transform the original model of interest into the standardized model by model reparameterization, so that the model parameters appearing in the standardized model are equivalent to the standardized parameters of the original model. At stage 2, we impose appropriate linear equality constraints on the standardized model and use a likelihood ratio test to make statistical inferences about the equality of standardized coefficients. Unlike other existing methods for comparing standardized coefficients, the proposed method does not require specific modeling features (e.g., specification of nonlinear constraints), which are available only in certain SEM software programs. Moreover, this method allows researchers to compare two or more standardized coefficients simultaneously in a standard and convenient way. Three real examples are given to illustrate the proposed method, using EQS, a popular SEM software program. Results show that the proposed method performs satisfactorily for testing the equality of standardized coefficients.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Kwan-Chan-2014, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2014-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Comparing squared multiple correlation coefficients using structural equation modeling}, - doi = {10.1080/10705511.2014.882673}, - number = {2}, - pages = {225--238}, - volume = {21}, - abstract = {In social science research, a common topic in multiple regression analysis is to compare the squared multiple correlation coefficients in different populations. Existing methods based on asymptotic theories (Olkin \& Finn, 1995) and bootstrapping (Chan, 2009) are available but these can only handle a 2-group comparison. Another method based on structural equation modeling (SEM) has been proposed recently. However, this method has three disadvantages. First, it requires the user to explicitly specify the sample R2 as a function in terms of the basic SEM model parameters, which is sometimes troublesome and error prone. Second, it requires the specification of nonlinear constraints, which is not available in some popular SEM software programs. Third, it is for a 2-group comparison primarily. In this article, a 2-stage SEM method is proposed as an alternative. Unlike all other existing methods, the proposed method is simple to use, and it does not require any specific programming features such as the specification of nonlinear constraints. More important, the method allows a simultaneous comparison of 3 or more groups. A real example is given to illustrate the proposed method using EQS, a popular SEM software program.}, - keywords = {squared multiple correlation coefficients, structural equation modeling, model reparameterization, multi-sample analysis}, - publisher = {Informa {UK} Limited}, -} - -@Article{Merkel-2014, - author = {Dirk Merkel}, - date = {2014}, - journaltitle = {Linux Journal}, - title = {{Docker}: Lightweight {Linux} containers for consistent development and deployment}, - number = {239}, - pages = {2}, - volume = {2014}, - url = {https://www.linuxjournal.com/content/docker-lightweight-linux-containers-consistent-development-and-deployment}, - annotation = {container, container-docker}, -} - -@Article{Neale-Hunter-Pritikin-etal-2015, - author = {Michael C. Neale and Michael D. Hunter and Joshua N. Pritikin and Mahsa Zahery and Timothy R. Brick and Robert M. Kirkpatrick and Ryne Estabrook and Timothy C. Bates and Hermine H. Maes and Steven M. Boker}, - date = {2015-01}, - journaltitle = {Psychometrika}, - title = {{OpenMx} 2.0: Extended Structural Equation and Statistical Modeling}, - doi = {10.1007/s11336-014-9435-8}, - number = {2}, - pages = {535--549}, - volume = {81}, - abstract = {The new software package OpenMx 2.0 for structural equation and other statistical modeling is introduced and its features are described. OpenMx is evolving in a modular direction and now allows a mix-and-match computational approach that separates model expectations from fit functions and optimizers. Major backend architectural improvements include a move to swappable open-source optimizers such as the newly written CSOLNP. Entire new methodologies such as item factor analysis and state space modeling have been implemented. New model expectation functions including support for the expression of models in LISREL syntax and a simplified multigroup expectation function are available. Ease-of-use improvements include helper functions to standardize model parameters and compute their Jacobian-based standard errors, access to model components through standard R \$ mechanisms, and improved tab completion from within the R Graphical User Interface.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Ou-Hunter-Chow-2019, - author = {Lu Ou and Michael D. Hunter and Sy-Miin Chow}, - date = {2019}, - journaltitle = {The R Journal}, - title = {What's for {dynr}: A package for linear and nonlinear dynamic modeling in {R}}, - doi = {10.32614/rj-2019-012}, - number = {1}, - pages = {91}, - volume = {11}, - abstract = {Intensive longitudinal data in the behavioral sciences are often noisy, multivariate in nature, and may involve multiple units undergoing regime switches by showing discontinuities interspersed with continuous dynamics. Despite increasing interest in using linear and nonlinear differential/difference equation models with regime switches, there has been a scarcity of software packages that are fast and freely accessible. We have created an R package called dynr that can handle a broad class of linear and nonlinear discreteand continuous-time models, with regime-switching properties and linear Gaussian measurement functions, in C, while maintaining simple and easy-to learn model specification functions in R. We present the mathematical and computational bases used by the dynr R package, and present two illustrative examples to demonstrate the unique features of dynr.}, - publisher = {The R Foundation}, - annotation = {ild, ild-software, r, r-packages}, -} - -@Article{Preacher-Selig-2012, - author = {Kristopher J. Preacher and James P. Selig}, - date = {2012-04}, - journaltitle = {Communication Methods and Measures}, - title = {Advantages of Monte Carlo Confidence Intervals for Indirect Effects}, - doi = {10.1080/19312458.2012.679848}, - number = {2}, - pages = {77--98}, - volume = {6}, - abstract = {Monte Carlo simulation is a useful but underutilized method of constructing confidence intervals for indirect effects in mediation analysis. The Monte Carlo confidence interval method has several distinct advantages over rival methods. Its performance is comparable to other widely accepted methods of interval construction, it can be used when only summary data are available, it can be used in situations where rival methods (e.g., bootstrapping and distribution of the product methods) are difficult or impossible, and it is not as computer-intensive as some other methods. In this study we discuss Monte Carlo confidence intervals for indirect effects, report the results of a simulation study comparing their performance to that of competing methods, demonstrate the method in applied examples, and discuss several software options for implementation in applied settings.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-montecarlo, mediation-bootstrap, semmcci}, -} - -@Article{Rosseel-2012, - author = {Yves Rosseel}, - date = {2012}, - journaltitle = {Journal of Statistical Software}, - title = {{lavaan}: An {R} package for structural equation modeling}, - doi = {10.18637/jss.v048.i02}, - number = {2}, - volume = {48}, - abstract = {Structural equation modeling (SEM) is a vast field and widely used by many applied researchers in the social and behavioral sciences. Over the years, many software packages for structural equation modeling have been developed, both free and commercial. However, perhaps the best state-of-the-art software packages in this field are still closed-source and/or commercial. The R package lavaan has been developed to provide applied researchers, teachers, and statisticians, a free, fully open-source, but commercial-quality package for latent variable modeling. This paper explains the aims behind the development of the package, gives an overview of its most important features, and provides some examples to illustrate how lavaan works in practice.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Schouten-Lugtig-Vink-2018, - author = {Rianne Margaretha Schouten and Peter Lugtig and Gerko Vink}, - date = {2018-07}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Generating missing values for simulation purposes: A multivariate amputation procedure}, - doi = {10.1080/00949655.2018.1491577}, - number = {15}, - pages = {2909--2930}, - volume = {88}, - abstract = {Missing data form a ubiquitous problem in scientific research, especially since most statistical analyses require complete data. To evaluate the performance of methods dealing with missing data, researchers perform simulation studies. An important aspect of these studies is the generation of missing values in a simulated, complete data set: the amputation procedure. We investigated the methodological validity and statistical nature of both the current amputation practice and a newly developed and implemented multivariate amputation procedure. We found that the current way of practice may not be appropriate for the generation of intuitive and reliable missing data problems. The multivariate amputation procedure, on the other hand, generates reliable amputations and allows for a proper regulation of missing data problems. The procedure has additional features to generate any missing data scenario precisely as intended. Hence, the multivariate amputation procedure is an efficient method to accurately evaluate missing data methodology.}, - publisher = {Informa {UK} Limited}, - keywords = {missing data, multiple imputation, multivariate amputation, evaluation}, -} - -@Article{Tofighi-Kelley-2019, - author = {Davood Tofighi and Ken Kelley}, - date = {2019-06}, - journaltitle = {Multivariate Behavioral Research}, - title = {Indirect effects in sequential mediation models: Evaluating methods for hypothesis testing and confidence interval formation}, - doi = {10.1080/00273171.2019.1618545}, - number = {2}, - pages = {188--210}, - volume = {55}, - abstract = {Complex mediation models, such as a two-mediator sequential model, have become more prevalent in the literature. To test an indirect effect in a two-mediator model, we conducted a large-scale Monte Carlo simulation study of the Type I error, statistical power, and confidence interval coverage rates of 10 frequentist and Bayesian confidence/credible intervals (CIs) for normally and nonnormally distributed data. The simulation included never-studied methods and conditions (e.g., Bayesian CI with flat and weakly informative prior methods, two model-based bootstrap methods, and two nonnormality conditions) as well as understudied methods (e.g., profile-likelihood, Monte Carlo with maximum likelihood standard error [MC-ML] and robust standard error [MC-Robust]). The popular BC bootstrap showed inflated Type I error rates and CI under-coverage. We recommend different methods depending on the purpose of the analysis. For testing the null hypothesis of no mediation, we recommend MC-ML, profile-likelihood, and two Bayesian methods. To report a CI, if data has a multivariate normal distribution, we recommend MC-ML, profile-likelihood, and the two Bayesian methods; otherwise, for multivariate nonnormal data we recommend the percentile bootstrap. We argue that the best method for testing hypotheses is not necessarily the best method for CI construction, which is consistent with the findings we present.}, - keywords = {indirect effect, confidence interval, sequential mediation, Bayesian credible interval}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bayesian, mediation-bootstrap, mediation-lb, mediation-montecarlo, semmcci}, -} - -@Article{Tofighi-MacKinnon-2015, - author = {Davood Tofighi and David P. MacKinnon}, - date = {2015-08}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{Monte Carlo} confidence intervals for complex functions of indirect effects}, - doi = {10.1080/10705511.2015.1057284}, - number = {2}, - pages = {194--205}, - volume = {23}, - abstract = {One challenge in mediation analysis is to generate a confidence interval (CI) with high coverage and power that maintains a nominal significance level for any well-defined function of indirect and direct effects in the general context of structural equation modeling (SEM). This study discusses a proposed Monte Carlo extension that finds the CIs for any well-defined function of the coefficients of SEM such as the product of $k$ coefficients and the ratio of the contrasts of indirect effects, using the Monte Carlo method. Finally, we conduct a small-scale simulation study to compare CIs produced by the Monte Carlo, nonparametric bootstrap, and asymptotic-delta methods. Based on our simulation study, we recommend researchers use the Monte Carlo method to test a complex function of indirect effects.}, - keywords = {confidence interval, mediation analysis, Monte Carlo}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-delta, mediation-montecarlo, semmcci}, -} - -@Article{vanBuuren-GroothuisOudshoorn-2011, - author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{mice}: Multivariate Imputation by Chained Equations in {R}}, - doi = {10.18637/jss.v045.i03}, - number = {3}, - volume = {45}, - abstract = {The R package mice imputes incomplete multivariate data by chained equations. The software mice 1.0 appeared in the year 2000 as an S-PLUS library, and in 2001 as an R package. mice 1.0 introduced predictor selection, passive imputation and automatic pooling. This article documents mice, which extends the functionality of mice 1.0 in several ways. In mice, the analysis of imputed data is made completely general, whereas the range of models under which pooling works is substantially extended. mice adds new functionality for imputing multilevel data, automatic predictor selection, data handling, post-processing imputed values, specialized pooling routines, model selection tools, and diagnostic graphs. Imputation of categorical data is improved in order to bypass problems caused by perfect prediction. Special attention is paid to transformations, sum scores, indices and interactions using passive imputation, and to the proper setup of the predictor matrix. mice can be downloaded from the Comprehensive R Archive Network. This article provides a hands-on, stepwise approach to solve applied incomplete data problems.}, - publisher = {Foundation for Open Access Statistic}, - keywords = {MICE, multiple imputation, chained equations, fully conditional specification, Gibbs sampler, predictor selection, passive imputation, R}, -} - -@Article{Wu-Jia-2013, - author = {Wei Wu and Fan Jia}, - date = {2013-09}, - journaltitle = {Multivariate Behavioral Research}, - title = {A new procedure to test mediation with missing data through nonparametric bootstrapping and multiple imputation}, - doi = {10.1080/00273171.2013.816235}, - number = {5}, - pages = {663--691}, - volume = {48}, - abstract = {This article proposes a new procedure to test mediation with the presence of missing data by combining nonparametric bootstrapping with multiple imputation (MI). This procedure performs MI first and then bootstrapping for each imputed data set. The proposed procedure is more computationally efficient than the procedure that performs bootstrapping first and then MI for each bootstrap sample. The validity of the procedure is evaluated using a simulation study under different sample size, missing data mechanism, missing data proportion, and shape of distribution conditions. The result suggests that the proposed procedure performs comparably to the procedure that combines bootstrapping with full information maximum likelihood under most conditions. However, caution needs to be taken when using this procedure to handle missing not-at-random or nonnormal data.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Yuan-Chan-2011, - author = {Ke-Hai Yuan and Wai Chan}, - date = {2011-08}, - journaltitle = {Psychometrika}, - title = {Biases and Standard Errors of Standardized Regression Coefficients}, - doi = {10.1007/s11336-011-9224-6}, - number = {4}, - pages = {670--690}, - volume = {76}, - abstract = {The paper obtains consistent standard errors (SE) and biases of order O(1/n) for the sample standardized regression coefficients with both random and given predictors. Analytical results indicate that the formulas for SEs given in popular text books are consistent only when the population value of the regression coefficient is zero. The sample standardized regression coefficients are also biased in general, although it should not be a concern in practice when the sample size is not too small. Monte Carlo results imply that, for both standardized and unstandardized sample regression coefficients, SE estimates based on asymptotics tend to under-predict the empirical ones at smaller sample sizes.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {asymptotics, bias, consistency, Monte Carlo}, - annotation = {standardized-regression, standardized-regression-delta, standardized-regression-normal, standardized-regression-adf}, -} - -@Article{Yzerbyt-Muller-Batailler-etal-2018, - author = {Vincent Yzerbyt and Dominique Muller and C{\a'e}dric Batailler and Charles M. Judd}, - date = {2018-12}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {New recommendations for testing indirect effects in mediational models: The need to report and test component paths}, - doi = {10.1037/pspa0000132}, - number = {6}, - pages = {929--943}, - volume = {115}, - abstract = {In light of current concerns with replicability and reporting false-positive effects in psychology, we examine Type I errors and power associated with 2 distinct approaches for the assessment of mediation, namely the component approach (testing individual parameter estimates in the model) and the index approach (testing a single mediational index). We conduct simulations that examine both approaches and show that the most commonly used tests under the index approach risk inflated Type I errors compared with the joint-significance test inspired by the component approach. We argue that the tendency to report only a single mediational index is worrisome for this reason and also because it is often accompanied by a failure to critically examine the individual causal paths underlying the mediational model. We recommend testing individual components of the indirect effect to argue for the presence of an indirect effect and then using other recommended procedures to calculate the size of that effect. Beyond simple mediation, we show that our conclusions also apply in cases of within-participant mediation and moderated mediation. We also provide a new R-package that allows for an easy implementation of our recommendations.}, - publisher = {American Psychological Association ({APA})}, - keywords = {indirect effects, mediation, joint-significance, bootstrap}, -} - -@Article{Zhang-Wang-2012, - author = {Zhiyong Zhang and Lijuan Wang}, - date = {2012-12}, - journaltitle = {Psychometrika}, - title = {Methods for mediation analysis with missing data}, - doi = {10.1007/s11336-012-9301-5}, - number = {1}, - pages = {154--184}, - volume = {78}, - abstract = {Despite wide applications of both mediation models and missing data techniques, formal discussion of mediation analysis with missing data is still rare. We introduce and compare four approaches to dealing with missing data in mediation analysis including listwise deletion, pairwise deletion, multiple imputation (MI), and a two-stage maximum likelihood (TS-ML) method. An R package bmem is developed to implement the four methods for mediation analysis with missing data in the structural equation modeling framework, and two real examples are used to illustrate the application of the four methods. The four methods are evaluated and compared under MCAR, MAR, and MNAR missing data mechanisms through simulation studies. Both MI and TS-ML perform well for MCAR and MAR data regardless of the inclusion of auxiliary variables and for AV-MNAR data with auxiliary variables. Although listwise deletion and pairwise deletion have low power and large parameter estimation bias in many studied conditions, they may provide useful information for exploring missing mechanisms.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, missing data, MI, TS-ML, bootstrap, auxiliary variables}, -} - -@Book{Eddelbuettel-2013, - author = {Dirk Eddelbuettel}, - date = {2013}, - title = {Seamless {R} and {C++} integration with {Rcpp}}, - doi = {10.1007/978-1-4614-6868-4}, - isbn = {978-1-4614-6868-4}, - publisher = {Springer New York}, - abstract = {Illustrates a range of statistical computations in R using the Rcpp package. Provides a general introduction to extending R with C++ code. Features an appendix for R users new to the C++ programming language Rcpp packages are presented in the context of useful application case studies.}, - annotation = {r, r-packages}, -} - -@Book{Enders-2010, - author = {Craig K. Enders}, - date = {2010-05-31}, - title = {Applied missing data analysis}, - isbn = {9781606236390}, - pagetotal = {377}, - library = {HA29 .E497 2010}, - addendum = {https://lccn.loc.gov/2010008465}, - abstract = {Walking readers step by step through complex concepts, this book translates missing data techniques into something that applied researchers and graduate students can understand and utilize in their own research. Enders explains the rationale and procedural details for maximum likelihood estimation, Bayesian estimation, multiple imputation, and models for handling missing not at random (MNAR) data. Easy-to-follow examples and small simulated data sets illustrate the techniques and clarify the underlying principles. The companion website (www.appliedmissingdata.com) includes data files and syntax for the examples in the book as well as up-to-date information on software. The book is accessible to substantive researchers while providing a level of detail that will satisfy quantitative specialists.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Missing observations (Statistics), Social sciences--Research--Methodology}, -} - -@InBook{Koopman-Howe-Hollenbeck-2014, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck}, - booktitle = {More statistical and methodological myths and urban legends: Doctrine, verity and fable in organizational and social sciences}, - date = {2014}, - title = {Pulling the {Sobel} test up by its bootstraps}, - bookauthor = {Charles E. Lance and Robert J. Vandenberg}, - isbn = {9780203775851}, - pages = {224--243}, - doi = {10.4324/9780203775851 }, - isbn = {9780203775851}, - abstract = {In the domain of building and testing theory, mediation relationships are among the most important that can be proposed. Mediation helps to explicate our theoretical models (Leavitt, Mitchell, \& Peterson, 2010) and addresses the fundamental question of why two constructs are related (Whetten, 1989). One of the better-known methods for testing mediation is commonly referred to as the ``Sobel test,'' named for the researcher who derived a standard error (Sobel, 1982) to test the significance of the indirect effect. Recently, a number of different research teams (e.g., Preacher \& Hayes, 2004; Shrout \& Bolger, 2002) have criticized the Sobel test because this standard error requires an assumption of normality for the indirect effect sampling distribution. This distribution tends to be positively skewed (i.e,. not normal), particularly in small samples, and so this assumption can be problematic (Preacher \& Hayes, 2004; Stone \& Sobel, 1990). As a result, the statistical power of the Sobel test may be lessened in these contexts (Preacher \& Hayes 2004; Shrout \& Bolger, 2002). In light of this concern, some scholars have advocated instead for the use of bootstrapping to test the significance of the indirect effect (e.g.. Shrout \& Bolger 2002). Bootstrapping requires no a priori assumption about the shape of the sampling distribution because this distribution is empirically estimated using a resampling procedure (Efron \& Tibshirani, 1993). As a result, departures from normality are less troublesome when creating a confidence interval for the indirect effect. For this reason, bootstrapping is now widely believed to be inherently superior to the Sobel test when testing the significance of the indirect effect in organizational research. Our position is that this belief constitutes an urban legend. As with all statistical urban legends, there is an underlying kernel of truth to the belief that bootstrapping is superior to the Sobel test. However, as we discuss in this chapter, there are several reasons to be concerned with a broad belief in the superiority of bootstrapping. We begin with a brief overview of mediation testing focusing on the Sobel test and bootstrapping and then explain the underlying kernel of truth that has propelled bootstrapping to the forefront of mediation testing in organizational research. Subsequently, we discuss four areas of concern that cast doubt on the belief of the inherent superiority of bootstrapping. Finally, we conclude with recommendations concerning the future of mediation testing in organizational research.}, - publisher = {Routledge/Taylor \& Francis Group}, -} - -@Book{Little-Rubin-2019, - author = {Roderick J. A. Little and Donald B. Rubin}, - date = {2019-04}, - title = {Statistical analysis with missing data}, - doi = {10.1002/9781119482260}, - edition = {3}, - isbn = {9781119482260}, - library = {QA276}, - addendum = {https://lccn.loc.gov/2018061330}, - abstract = {An up-to-date, comprehensive treatment of a classic text on missing data in statistics. - The topic of missing data has gained considerable attention in recent decades. This new edition by two acknowledged experts on the subject offers an up-to-date account of practical methodology for handling missing data problems. Blending theory and application, authors Roderick Little and Donald Rubin review historical approaches to the subject and describe simple methods for multivariate analysis with missing values. They then provide a coherent theory for analysis of problems based on likelihoods derived from statistical models for the data and the missing data mechanism, and then they apply the theory to a wide range of important missing data problems. - Statistical Analysis with Missing Data, Third Edition starts by introducing readers to the subject and approaches toward solving it. It looks at the patterns and mechanisms that create the missing data, as well as a taxonomy of missing data. It then goes on to examine missing data in experiments, before discussing complete-case and available-case analysis, including weighting methods. The new edition expands its coverage to include recent work on topics such as nonresponse in sample surveys, causal inference, diagnostic methods, and sensitivity analysis, among a host of other topics. - \begin{itemize} \item An updated ``classic'' written by renowned authorities on the subject \item Features over 150 exercises (including many new ones) \item Covers recent work on important methods like multiple imputation, robust alternatives to weighting, and Bayesian methods \item Revises previous topics based on past student feedback and class experience \item Contains an updated and expanded bibliography \end{itemize} - The authors were awarded The Karl Pearson Prize in 2017 by the International Statistical Institute, for a research contribution that has had profound influence on statistical theory, methodology or applications. Their work ``has been no less than defining and transforming.'' (ISI) - Statistical Analysis with Missing Data, Third Edition is an ideal textbook for upper undergraduate and/or beginning graduate level students of the subject. It is also an excellent source of information for applied statisticians and practitioners in government and industry.}, - publisher = {Wiley}, - keywords = {Mathematical statistics, Mathematical statistics--Problems, exercises, etc., Missing observations (Statistics), Missing observations (Statistics)--Problems, exercises, etc.}, -} - -@Book{Pawitan-2013, - author = {Yudi Pawitan}, - date = {2013-01-17}, - title = {In all likelihood: Statistical modelling and inference using likelihood}, - isbn = {9780199671229}, - pagetotal = {544}, - abstract = {Based on a course in the theory of statistics this text concentrates on what can be achieved using the likelihood/Fisherian method of taking account of uncertainty when studying a statistical problem. It takes the concept ot the likelihood as providing the best methods for unifying the demands of statistical modelling and the theory of inference. Every likelihood concept is illustrated by realistic examples, which are not compromised by computational problems. Examples range from a simile comparison of two accident rates, to complex studies that require generalised linear or semiparametric modelling. - The emphasis is that the likelihood is not simply a device to produce an estimate, but an important tool for modelling. The book generally takes an informal approach, where most important results are established using heuristic arguments and motivated with realistic examples. With the currently available computing power, examples are not contrived to allow a closed analytical solution, and the book can concentrate on the statistical aspects of the data modelling. In addition to classical likelihood theory, the book covers many modern topics such as generalized linear models and mixed models, non parametric smoothing, robustness, the EM algorithm and empirical likelihood.}, - publisher = {Oxford University Press}, -} - -@Book{vanBuuren-2018, - author = {Stef {van Buuren}}, - date = {2018-07}, - title = {Flexible imputation of missing data}, - doi = {10.1201/9780429492259}, - edition = {2}, - isbn = {9780429492259}, - publisher = {Chapman and Hall/{CRC}}, - library = {QA278}, - addendum = {https://lccn.loc.gov/2019719619}, - abstract = {Missing data pose challenges to real-life data analysis. Simple ad-hoc fixes, like deletion or mean imputation, only work under highly restrictive conditions, which are often not met in practice. Multiple imputation replaces each missing value by multiple plausible values. The variability between these replacements reflects our ignorance of the true (but missing) value. Each of the completed data set is then analyzed by standard methods, and the results are pooled to obtain unbiased estimates with correct confidence intervals. Multiple imputation is a general approach that also inspires novel solutions to old problems by reformulating the task at hand as a missing-data problem. - This is the second edition of a popular book on multiple imputation, focused on explaining the application of methods through detailed worked examples using the MICE package as developed by the author. This new edition incorporates the recent developments in this fast-moving field. - This class-tested book avoids mathematical and technical details as much as possible: formulas are accompanied by verbal statements that explain the formula in accessible terms. The book sharpens the reader’s intuition on how to think about missing data, and provides all the tools needed to execute a well-grounded quantitative analysis in the presence of missing data.}, - keywords = {Multivariate analysis, Multiple imputation (Statistics), Missing observations (Statistics)}, -} - -@InCollection{Zhang-Wang-Tong-2015, - author = {Zhiyong Zhang and Lijuan Wang and Xin Tong}, - booktitle = {Quantitative Psychology Research}, - date = {2015}, - title = {Mediation analysis with missing data through multiple imputation and bootstrap}, - doi = {10.1007/978-3-319-19977-1_24}, - pages = {341--355}, - abtract = {A method using multiple imputation and bootstrap for dealing with missing data in mediation analysis is introduced and implemented in both SAS and R. Through simulation studies, it is shown that the method performs well for both MCAR and MAR data without and with auxiliary variables. It is also shown that the method can work for MNAR data if auxiliary variables related to missingness are included. The application of the method is demonstrated through the analysis of a subset of data from the National Longitudinal Survey of Youth. Mediation analysis with missing data can be conducted using the provided SAS macros and R package bmem.}, - publisher = {Springer International Publishing}, - keywords = {mediation analysis, missing data, multiple imputation, bootstrap}, -} - -@Report{Jones-Waller-2013b, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013-05-25}, - institution = {University of Minnesota-Twin Cities}, - title = {The normal-theory and asymptotic distribution-free ({ADF}) covariance matrix of standardized regression coefficients: Theoretical extensions and finite sample behavior}, - type = {techreport}, - url = {http://users.cla.umn.edu/~nwaller/downloads/techreports/TR052913.pdf}, - urldate = {2022-07-22}, - abstract = {Yuan and Chan (2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a new method for computing this covariance matrix from correlations. We then show that Yuan and Chan's original equations can also be used when only correlational data are available. Next, we describe an asymptotic distribution-free (ADF; Browne, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that theADF method works well with non-normal data in moderate-to-large samples using both simulated and real-data examples. Finally, we provide R code (R Development Core Team, 2012) in an Appendix to make these methods accessible to applied researchers.}, -} - -@Manual{Muthen-Muthen-2017, - author = {Linda K. Muth{\a'e}n and Bengt O. Muth{\a'e}n}, - date = {2017}, - title = {{Mplus} user’s guide. {Eighth} edition}, - location = {Los Angeles, CA}, - publisher = {{Muth\'en} \& {Muth\'en}}, - annotation = {sem, sem-software}, -} - -@Article{Cheung-2021, - author = {Mike W.-L. Cheung}, - date = {2021-06}, - journaltitle = {Alcohol and Alcoholism}, - title = {Synthesizing indirect effects in mediation models with meta-analytic methods}, - doi = {10.1093/alcalc/agab044}, - number = {1}, - pages = {5--15}, - volume = {57}, - abstract = {Aims - A mediator is a variable that explains the underlying mechanism between an independent variable and a dependent variable. The indirect effect indicates the effect from the predictor to the outcome variable via the mediator. In contrast, the direct effect represents the predictor's effort on the outcome variable after controlling for the mediator. - Methods - A single study rarely provides enough evidence to answer research questions in a particular domain. Replications are generally recommended as the gold standard to conduct scientific research. When a sufficient number of studies have been conducted addressing similar research questions, a meta-analysis can be used to synthesize those studies' findings. - Results - The main objective of this paper is to introduce two frameworks to integrating studies using mediation analysis. The first framework involves calculating standardized indirect effects and direct effects and conducting a multivariate meta-analysis on those effect sizes. The second one uses meta-analytic structural equation modeling to synthesize correlation matrices and fit mediation models on the average correlation matrix. We illustrate these procedures on a real dataset using the R statistical platform. - Conclusion - This paper closes with some further directions for future studies.}, - publisher = {Oxford University Press ({OUP})}, - keywords = {heterogeneity, gold standard, outcome variable, datasets, mediation analysis}, -} - -@Article{Cheung-Pesigan-2023a, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {{FINDOUT}: Using either {SPSS} commands or graphical user interface to identify influential cases in structural equation modeling in {AMOS}}, - doi = {10.1080/00273171.2022.2148089}, - pages = {1--5}, - abstract = {The results in a structural equation modeling (SEM) analysis can be influenced by just a few observations, called influential cases. Tools have been developed for users of R to identify them. However, similar tools are not available for AMOS, which is also a popular SEM software package. We introduce the FINDOUT toolset, a group of SPSS extension commands, and an AMOS plugin, to identify influential cases and examine how these cases influence the results. The SPSS commands can be used either as syntax commands or as custom dialogs from pull-down menus, and the AMOS plugin can be run from AMOS pull-down menu. We believe these tools can help researchers to examine the robustness of their findings to influential cases.}, - publisher = {Informa {UK} Limited}, - keywords = {influential cases, outliers, structural equation modeling, AMOS, sensitivity analysis, SPSS}, -} - -@Article{Cheung-Pesigan-2023b, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-05}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{semlbci}: An {R} package for forming likelihood-based confidence intervals for parameter estimates, correlations, indirect effects, and other derived parameters}, - doi = {10.1080/10705511.2023.2183860}, - pages = {1--15}, - abstract = {There are three common types of confidence interval (CI) in structural equation modeling (SEM): Wald-type CI, bootstrapping CI, and likelihood-based CI (LBCI). LBCI has the following advantages: (1) it has better coverage probabilities and Type I error rate compared to Wald-type CI when the sample size is finite; (2) it correctly tests the null hypothesis of a parameter based on likelihood ratio chi-square difference test; (3) it is less computationally intensive than bootstrapping CI; and (4) it is invariant to transformations. However, LBCI is not available in many popular SEM software packages. We developed an R package, semlbci, for forming LBCI for parameters in models fitted by lavaan, a popular open-source SEM package, such that researchers have more options in forming CIs for parameters in SEM. The package supports both unstandardized and standardized estimates, derived parameters such as indirect effect, multisample models, and the robust LBCI proposed by Falk.}, - publisher = {Informa {UK} Limited}, - keywords = {confidence interval, likelihood-based confidence interval, robust method, structural equation modeling}, - annotation = {r, r-packages, sem, sem-software, lb}, -} - -@Article{Cheung-Pesigan-Vong-2022, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan and Weng Ngai Vong}, - date = {2022-03}, - journaltitle = {Behavior Research Methods}, - title = {{DIY} bootstrapping: Getting the nonparametric bootstrap confidence interval in {SPSS} for any statistics or function of statistics (when this bootstrapping is appropriate)}, - doi = {10.3758/s13428-022-01808-5}, - number = {2}, - pages = {474--490}, - volume = {55}, - abstract = {Researchers can generate bootstrap confidence intervals for some statistics in SPSS using the BOOTSTRAP command. However, this command can only be applied to selected procedures, and only to selected statistics in these procedures. We developed an extension command and prepared some sample syntax files based on existing approaches from the Internet to illustrate how researchers can (a) generate a large number of nonparametric bootstrap samples, (b) do desired analysis on all these samples, and (c) form the bootstrap confidence intervals for selected statistics using the OMS commands. We developed these tools to help researchers apply nonparametric bootstrapping to any statistics for which this method is appropriate, including statistics derived from other statistics, such as standardized effect size measures computed from the t test results. We also discussed how researchers can extend the tools for other statistics and scenarios they encounter.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {bootstrapping, effect sizes, confidence intervals}, -} - -@Article{Li-Oravecz-Zhou-etal-2022, - author = {Yanling Li and Zita Oravecz and Shuai Zhou and Yosef Bodovski and Ian J. Barnett and Guangqing Chi and Yuan Zhou and Naomi P. Friedman and Scott I. Vrieze and Sy-Miin Chow}, - date = {2022-01}, - journaltitle = {Psychometrika}, - title = {{Bayesian} forecasting with a regime-switching zero-inflated multilevel poisson regression model: An application to adolescent alcohol use with spatial covariates}, - doi = {10.1007/s11336-021-09831-9}, - number = {2}, - pages = {376--402}, - volume = {87}, - abstract = {In this paper, we present and evaluate a novel Bayesian regime-switching zero-inflated multilevel Poisson (RS-ZIMLP) regression model for forecasting alcohol use dynamics. The model partitions individuals’ data into two phases, known as regimes, with: (1) a zero-inflation regime that is used to accommodate high instances of zeros (non-drinking) and (2) a multilevel Poisson regression regime in which variations in individuals’ log-transformed average rates of alcohol use are captured by means of an autoregressive process with exogenous predictors and a person-specific intercept. The times at which individuals are in each regime are unknown, but may be estimated from the data. We assume that the regime indicator follows a first-order Markov process as related to exogenous predictors of interest. The forecast performance of the proposed model was evaluated using a Monte Carlo simulation study and further demonstrated using substance use and spatial covariate data from the Colorado Online Twin Study (CoTwins). Results showed that the proposed model yielded better forecast performance compared to a baseline model which predicted all cases as non-drinking and a reduced ZIMLP model without the RS structure, as indicated by higher AUC (the area under the receiver operating characteristic (ROC) curve) scores, and lower mean absolute errors (MAEs) and root-mean-square errors (RMSEs). The improvements in forecast performance were even more pronounced when we limited the comparisons to participants who showed at least one instance of transition to drinking. }, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Bayesian zero-inflated Poisson model, forecast, intensive longitudinal data, regime-switching, spatial data, substance use}, - annotation = {bayesian, ild}, -} - -@Article{McNeish-MacKinnon-2022, - author = {Daniel McNeish and David P. MacKinnon}, - date = {2022-12}, - journaltitle = {Psychological Methods}, - title = {Intensive longitudinal mediation in {Mplus}}, - doi = {10.1037/met0000536}, - abstract = {Much of the existing longitudinal mediation literature focuses on panel data where relatively few repeated measures are collected over a relatively broad timespan. However, technological advances in data collection (e.g., smartphones, wearables) have led to a proliferation of short duration, densely collected longitudinal data in behavioral research. These intensive longitudinal data differ in structure and focus relative to traditionally collected panel data. As a result, existing methodological resources do not necessarily extend to nuances present in the recent influx of intensive longitudinal data and designs. In this tutorial, we first cover potential limitations of traditional longitudinal mediation models to accommodate unique characteristics of intensive longitudinal data. Then, we discuss how recently developed dynamic structural equation models (DSEMs) may be well-suited for mediation modeling with intensive longitudinal data and can overcome some of the limitations associated with traditional approaches. We describe four increasingly complex intensive longitudinal mediation models: (a) stationary models where the indirect effect is constant over time and people, (b) person-specific models where the indirect effect varies across people, (c) dynamic models where the indirect effect varies across time, and (d) cross-classified models where the indirect effect varies across both time and people. We apply each model to a running example featuring a mobile health intervention designed to improve health behavior of individuals with binge eating disorder. In each example, we provide annotated Mplus code and interpretation of the output to guide empirical researchers through mediation modeling with this increasingly popular type of longitudinal data.}, - publisher = {American Psychological Association ({APA})}, - keywords = {intensive longitudinal data, time-series, mediation, EMA, daily diary}, - annotation = {ild, ild-mediation, ild-software}, -} - -@Article{Nust-Eddelbuettel-Bennett-etal-2020, - author = {Daniel N{\"u}st and Dirk Eddelbuettel and Dom Bennett and Robrecht Cannoodt and Dav Clark and Gergely Dar{\a'o}czi and Mark Edmondson and Colin Fay and Ellis Hughes and Lars Kjeldgaard and Sean Lopp and Ben Marwick and Heather Nolis and Jacqueline Nolis and Hong Ooi and Karthik Ram and Noam Ross and Lori Shepherd and P{\a'e}ter S{\a'o}lymos and Tyson Lee Swetnam and Nitesh Turaga and Charlotte {Van Petegem} and Jason Williams and Craig Willis and Nan Xiao}, - date = {2020}, - journaltitle = {The R Journal}, - title = {The {Rockerverse}: Packages and applications for containerisation with {R}}, - doi = {10.32614/rj-2020-007}, - number = {1}, - pages = {437}, - volume = {12}, - abstract = {The Rocker Project provides widely used Docker images for R across different application scenarios. This article surveys downstream projects that build upon the Rocker Project images and presents the current state of R packages for managing Docker images and controlling containers. These use cases cover diverse topics such as package development, reproducible research, collaborative work, cloud-based data processing, and production deployment of services. The variety of applications demonstrates the power of the Rocker Project specifically and containerisation in general. Across the diverse ways to use containers, we identified common themes: reproducible environments, scalability and efficiency, and portability across clouds. We conclude that the current growth and diversification of use cases is likely to continue its positive impact, but see the need for consolidating the Rockerverse ecosystem of packages, developing common practices for applications, and exploring alternative containerisation software.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-rocker}, -} - -@Article{Pesigan-Cheung-2020, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2020-12}, - journaltitle = {Frontiers in Psychology}, - title = {{SEM}-based methods to form confidence intervals for indirect effect: Still applicable given nonnormality, under certain conditions}, - doi = {10.3389/fpsyg.2020.571928}, - volume = {11}, - abstract = {A SEM-based approach using likelihood-based confidence interval (LBCI) has been proposed to form confidence intervals for unstandardized and standardized indirect effect in mediation models. However, when used with the maximum likelihood estimation, this approach requires that the variables are multivariate normally distributed. This can affect the LBCIs of unstandardized and standardized effect differently. In the present study, the robustness of this approach when the predictor is not normally distributed but the error terms are conditionally normal, which does not violate the distributional assumption of ordinary least squares (OLS) estimation, is compared to four other approaches: nonparametric bootstrapping, two variants of LBCI, LBCI assuming the predictor is fixed (LBCI-Fixed-X) and LBCI based on ADF estimation (LBCI-ADF), and Monte Carlo. A simulation study was conducted using a simple mediation model and a serial mediation model, manipulating the distribution of the predictor. The Monte Carlo method performed worst among the methods. LBCI and LBCI-Fixed-X had suboptimal performance when the distributions had high kurtosis and the population indirect effects were medium to large. In some conditions, the problem was severe even when the sample size was large. LBCI-ADF and nonparametric bootstrapping had coverage probabilities close to the nominal value in nearly all conditions, although the coverage probabilities were still suboptimal for the serial mediation model when the sample size was small with respect to the model. Implications of these findings in the context of this special case of nonnormal data were discussed.}, - publisher = {Frontiers Media {SA}}, - keywords = {mediation, nonnormal, confidence interval, structural equation modeling, bootstrapping}, -} - -@Article{Pesigan-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2023-08}, - journaltitle = {Behavior Research Methods}, - title = {{Monte Carlo} confidence intervals for the indirect effect with missing data}, - doi = {10.3758/s13428-023-02114-4}, - abstract = {Missing data is a common occurrence in mediation analysis. As a result, the methods used to construct confidence intervals around the indirect effect should consider missing data. Previous research has demonstrated that, for the indirect effect in data with complete cases, the Monte Carlo method performs as well as nonparametric bootstrap confidence intervals (see MacKinnon et al., Multivariate Behavioral Research, 39(1), 99–128, 2004; Preacher \& Selig, Communication Methods and Measures, 6(2), 77–98, 2012; Tofighi \& MacKinnon, Structural Equation Modeling: A Multidisciplinary Journal, 23(2), 194–205, 2015). In this manuscript, we propose a simple, fast, and accurate two-step approach for generating confidence intervals for the indirect effect, in the presence of missing data, based on the Monte Carlo method. In the first step, an appropriate method, for example, full-information maximum likelihood or multiple imputation, is used to estimate the parameters and their corresponding sampling variance-covariance matrix in a mediation model. In the second step, the sampling distribution of the indirect effect is simulated using estimates from the first step. A confidence interval is constructed from the resulting sampling distribution. A simulation study with various conditions is presented. Implications of the results for applied research are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Monte Carlo method, nonparametric bootstrap, indirect effect, mediation, missing completely at random, missing at random, full-information maximum likelihood, multiple imputation}, - annotation = {mediation, mediation-montecarlo, mediation-bootstrap, semmcci}, -} - -@Article{Pesigan-Sun-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Rong Wei Sun and Shu Fai Cheung}, - date = {2023-04}, - journaltitle = {Multivariate Behavioral Research}, - title = {{betaDelta} and {betaSandwich}: Confidence intervals for standardized regression coefficients in {R}}, - doi = {10.1080/00273171.2023.2201277}, - pages = {1--4}, - abstract = {The multivariate delta method was used by Yuan and Chan to estimate standard errors and confidence intervals for standardized regression coefficients. Jones and Waller extended the earlier work to situations where data are nonnormal by utilizing Browne’s asymptotic distribution-free (ADF) theory. Furthermore, Dudgeon developed standard errors and confidence intervals, employing heteroskedasticity-consistent (HC) estimators, that are robust to nonnormality with better performance in smaller sample sizes compared to Jones and Waller’s ADF technique. Despite these advancements, empirical research has been slow to adopt these methodologies. This can be a result of the dearth of user-friendly software programs to put these techniques to use. We present the betaDelta and the betaSandwich packages in the R statistical software environment in this manuscript. Both the normal-theory approach and the ADF approach put forth by Yuan and Chan and Jones and Waller are implemented by the betaDelta package. The HC approach proposed by Dudgeon is implemented by the betaSandwich package. The use of the packages is demonstrated with an empirical example. We think the packages will enable applied researchers to accurately assess the sampling variability of standardized regression coefficients.}, - publisher = {Informa {UK} Limited}, - keywords = {standardized regression coefficients, confidence intervals, delta method standard errors, heteroskedasticity-consistent standard errors, R package}, - annotation = {r, r-packages}, -} - -@Article{Savalei-Rosseel-2021, - author = {Victoria Savalei and Yves Rosseel}, - date = {2021-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Computational options for standard errors and test statistics with incomplete normal and nonnormal data in {SEM}}, - doi = {10.1080/10705511.2021.1877548}, - number = {2}, - pages = {163--181}, - volume = {29}, - abstract = {This article provides an overview of different computational options for inference following normal theory maximum likelihood (ML) estimation in structural equation modeling (SEM) with incomplete normal and nonnormal data. Complete data are covered as a special case. These computational options include whether the information matrix is observed or expected, whether the observed information matrix is estimated numerically or using an analytic asymptotic approximation, and whether the information matrix and the outer product matrix of the score vector are evaluated at the saturated or at the structured estimates. A variety of different standard errors and robust test statistics become possible by varying these options. We review the asymptotic properties of these computational variations, and we show how to obtain them using lavaan in R. We hope that this article will encourage methodologists to study the impact of the available computational options on the performance of standard errors and test statistics in SEM.}, - publisher = {Informa {UK} Limited}, - keywords = {incomplete data, nonnormal data, robust corrections, software implementation}, -} - -@Article{Tofighi-Kelley-2020, - author = {Davood Tofighi and Ken Kelley}, - date = {2020}, - journaltitle = {Psychological Methods}, - title = {Improved inference in mediation analysis: Introducing the model-based constrained optimization procedure}, - doi = {10.1037/met0000259}, - pages = {496--515}, - volume = {25}, - abstract = {Mediation analysis is an important approach for investigating causal pathways. One approach used in mediation analysis is the test of an indirect effect, which seeks to measure how the effect of an independent variable impacts an outcome variable through one or more mediators. However, in many situations the proposed tests of indirect effects, including popular confidence interval-based methods, tend to produce poor Type I error rates when mediation does not occur and, more generally, only allow dichotomous decisions of ``not significant'' or ``significant'' with regards to the statistical conclusion. To remedy these issues, we propose a new method, a likelihood ratio test (LRT), that uses non-linear constraints in what we term the model-based constrained optimization (MBCO) procedure. The MBCO procedure (a) offers a more robust Type I error rate than existing methods; (b) provides a p-value, which serves as a continuous measure of compatibility of data with the hypothesized null model (not just a dichotomous reject or fail-to-reject decision rule); (c) allows simple and complex hypotheses about mediation (i.e., one or more mediators; different mediational pathways), and (d) allows the mediation model to use observed or latent variables. The MBCO procedure is based on a structural equation modeling framework (even if latent variables are not specified) with specialized fitting routines, namely with the use of non-linear constraints. We advocate using the MBCO procedure to test hypotheses about an indirect effect in addition to reporting a confidence interval to capture uncertainty about the indirect effect because this combination transcends existing methods.}, - publisher = {{American Psychological Association ({APA})}}, -} - -@Article{Wang-Zhang-2020, - author = {Lijuan Wang and Qian Zhang}, - date = {2020-06}, - journaltitle = {Psychological Methods}, - title = {Investigating the impact of the time interval selection on autoregressive mediation modeling: Result interpretations, effect reporting, and temporal designs}, - doi = {10.1037/met0000235}, - number = {3}, - pages = {271--291}, - volume = {25}, - abstract = {This study investigates the impact of the time interval (the time passed between 2 consecutive measurements) selection on autoregressive mediation modeling (AMM). For a widely used autoregressive mediation model, via analytical derivations, we explained why and how the conventionally reported time-specific coefficient estimates (e.g., $\hat{a} \hat{b}$ and $\hat{c}^{\prime}$ ) and inference results in AMM provide limited information and can arrive in even misleading conclusions about direct and indirect effects over time. Furthermore, under the stationarity assumption, we proposed an approach to calculate the overall direct and indirect effect estimates over time and the time lag lengths at which they reach maxima, using AMM results. The derivation results revealed that the overall direct and indirect effect curves are asymptotically invariant to the time interval selection, under stationarity. With finite samples and thus sampling errors and potential computing problems, however, our simulation results revealed that the overall indirect effect curves were better recovered when the time interval is selected to be closer to half of the time lag length at which the overall indirect effect reaches its maximum. An R function and an R Shiny app were developed to obtain the overall direct and indirect effect curves over time and facilitate the time interval selection using AMM results. Our findings provide another look at the connections between AMM and continuous time mediation modeling and the connections are discussed.}, - publisher = {American Psychological Association ({APA})}, - keywords = {longitudinal mediation, autoregressive mediation modeling, time interval selection, time-specific indirect effect, overall indirect effect}, - annotation = {ild, ild-mediation}, -} - -@Book{Hayes-2022, - author = {Andrew F. Hayes}, - date = {2022}, - title = {Introduction to mediation, moderation, and conditional process analysis: A regression-based approach}, - series = {Methodology in the social sciences}, - edition = {3}, - isbn = {9781462549030}, - pages = {732}, - library = {HA31.3 .H39 2022}, - addendum = {https://lccn.loc.gov/2021031108}, - abstract = {Lauded for its easy-to-understand, conversational discussion of the fundamentals of mediation, moderation, and conditional process analysis, this book has been fully revised with 50\% new content, including sections on working with multicategorical antecedent variables, the use of PROCESS version 3 for SPSS and SAS for model estimation, and annotated PROCESS v3 outputs. Using the principles of ordinary least squares regression, Andrew F. Hayes carefully explains procedures for testing hypotheses about the conditions under and the mechanisms by which causal effects operate, as well as the moderation of such mechanisms. Hayes shows how to estimate and interpret direct, indirect, and conditional effects; probe and visualize interactions; test questions about moderated mediation; and report different types of analyses. Data for all the examples are available on the companion website (www.afhayes.com) along with links to download PROCESS.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Mediation (Statistics), Regression analysis}, -} - -@Manual{Arbuckle-2020, - author = {James L. Arbuckle}, - date = {2020}, - title = {Amos 27.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Manual{Arbuckle-2021, - author = {James L. Arbuckle}, - date = {2021}, - title = {Amos 28.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Report{Asparouhov-Muthen-2022, - author = {Tihomir Asparouhov and Bengt O. Muth{\a'e}n}, - date = {2022}, - title = {Multiple imputation with {Mplus}}, - type = {techreport}, - url = {http://www.statmodel.com/download/Imputations7.pdf}, - institution = {http://www.statmodel.com}, -} - -@Manual{Eddelbuettel-Francois-Allaire-etal-2023, - title = {{Rcpp}: Seamless {R} and {C++} Integration}, - author = {Dirk Eddelbuettel and Romain Francois and JJ Allaire and Kevin Ushey and Qiang Kou and Nathan Russell and Inaki Ucar and Douglas Bates and John Chambers}, - year = {2023}, - note = {R package version 1.0.11}, - url = {https://CRAN.R-project.org/package=Rcpp}, - annotation = {r, r-package}, -} - -@Manual{Jorgensen-Pornprasertmanit-Schoemann-etal-2022, - title = {{semTools}: Useful tools for structural equation modeling}, - author = {Terrence D. Jorgensen and Sunthud Pornprasertmanit and Alexander M. Schoemann and Yves Rosseel}, - year = {2022}, - note = {R package version 0.5-6}, - url = {https://CRAN.R-project.org/package=semTools}, -} - -@Misc{Kurtzer-cclerget-Bauer-etal-2021, - author = {Gregory M. Kurtzer and {cclerget} and Michael Bauer and Ian Kaneshiro and David Trudgian and David Godlove}, - date = {2021}, - title = {{hpcng/singularity: Singularity 3.7.3}}, - doi = {10.5281/ZENODO.1310023}, - copyright = {Open Access}, - publisher = {Zenodo}, - annotation = {container, container-singularity}, -} - -@Manual{RCoreTeam-2021, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2021}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2022, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2022}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2023, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2023}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{Waller-2022, - author = {Niels G. Waller}, - title = {{fungible}: Psychometric functions from the {Waller Lab}}, - year = {2022}, - note = {R package version 2.2.1}, - url = {https://CRAN.R-project.org/package=fungible}, - publisher = {The R Foundation}, - annotation = {r, r-package}, -} - -@PhdThesis{Pesigan-2022, - author = {Ivan Jacob Agaloos Pesigan}, - year = {2022}, - school = {University of Macau}, - title = {Confidence intervals for standardized coefficients: Applied to regression coefficients in primary studies and indirect effects in meta-analytic structural equation modeling}, - type = {phdthesis}, -} diff --git a/.setup/pkgdown/vignettes.bib b/.setup/pkgdown/vignettes.bib deleted file mode 100644 index cd21073..0000000 --- a/.setup/pkgdown/vignettes.bib +++ /dev/null @@ -1,1450 +0,0 @@ -@Article{Craig-1936, - author = {Cecil C. Craig}, - date = {1936-03}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {On the frequency function of $xy$}, - doi = {10.1214/aoms/1177732541}, - number = {1}, - pages = {1--15}, - volume = {7}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Aroian-1947, - author = {Leo A. Aroian}, - date = {1947-06}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The probability function of the product of two normally distributed variables}, - doi = {10.1214/aoms/1177730442}, - number = {2}, - pages = {265--271}, - volume = {18}, - abstract = {Let $x$ and $y$ follow a normal bivariate probability function with means $\bar X, \bar Y$, standard deviations $\sigma_1, \sigma_2$, respectively, $r$ the coefficient of correlation, and $\rho_1 = \bar X/\sigma_1, \rho_2 = \bar Y/\sigma_2$. Professor C. C. Craig [1] has found the probability function of $z = xy/\sigma_1\sigma_2$ in closed form as the difference of two integrals. For purposes of numerical computation he has expanded this result in an infinite series involving powers of $z, \rho_1, \rho_2$, and Bessel functions of a certain type; in addition, he has determined the moments, semin-variants, and the moment generating function of $z$. However, for $\rho_1$ and $\rho_2$ large, as Craig points out, the series expansion converges very slowly. Even for $\rho_1$ and $\rho_2$ as small as 2, the expansion is unwieldy. We shall show that as $\rho_1$ and $\rho_2 \rightarrow \infty$, the probability function of $z$ approaches a normal curve and in case $r = 0$ the Type III function and the Gram-Charlier Type A series are excellent approximations to the $z$ distribution in the proper region. Numerical integration provides a substitute for the infinite series wherever the exact values of the probability function of $z$ are needed. Some extensions of the main theorem are given in section 5 and a practical problem involving the probability function of $z$ is solved.}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Cochran-1952, - author = {William G. Cochran}, - date = {1952-09}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The $\chi^{2}$ test of goodness of fit}, - doi = {10.1214/aoms/1177729380}, - number = {3}, - pages = {315--345}, - volume = {23}, - publisher = {Institute of Mathematical Statistics}, - abstract = {This paper contains an expository discussion of the chi square test of goodness of fit, intended for the student and user of statistical theory rather than for the expert. Part I describes the historical development of the distribution theory on which the test rests. Research bearing on the practical application of the test--in particular on the minimum expected number per class and the construction of classes--is discussed in Part II. Some varied opinions about the extent to which the test actually is useful to the scientist are presented in Part III. Part IV outlines a number of tests that have been proposed as substitutes for the chi square test (the $\omega^2$ test, the smooth test, the likelihood ratio test) and Part V a number of supplementary tests (the run test, tests based on low moments, subdivision of chi square into components).}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Goodman-1960, - author = {Leo A. Goodman}, - date = {1960-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {On the exact variance of products}, - doi = {10.1080/01621459.1960.10483369}, - number = {292}, - pages = {708--713}, - volume = {55}, - abstract = {A simple exact formula for the variance of the product of two random variables, say, x and y, is given as a function of the means and central product-moments of x and y. The usual approximate variance formula for xy is compared with this exact formula; e.g., we note, in the special case where x and y are independent, that the ``variance'' computed by the approximate formula is less than the exact variance, and that the accuracy of the approximation depends on the sum of the reciprocals of the squared coefficients of variation of x and y. The case where x and y need not be independent is also studied, and exact variance formulas are presented for several different ``product estimates.'' (The usefulness of exact formulas becomes apparent when the variances of these estimates are compared.) When x and y are independent, simple unbiased estimates of these exact variances are suggested; in the more general case, consistent estimates are presented.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Bradley-1978, - author = {James V. Bradley}, - date = {1978-11}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Robustness?}, - doi = {10.1111/j.2044-8317.1978.tb00581.x}, - number = {2}, - pages = {144--152}, - volume = {31}, - publisher = {Wiley}, - annotation = {robustness}, - abstract = {The actual behaviour of the probability of a Type I error under assumption violation is quite complex, depending upon a wide variety of interacting factors. Yet allegations of robustness tend to ignore its highly particularistic nature and neglect to mention important qualifying conditions. The result is often a vast overgeneralization which nevertheless is difficult to refute since a standard quantitative definition of what constitutes robustness does not exist. Yet under any halfway reasonable quantitative definition, many of the most prevalent claims of robustness would be demonstrably false. Therefore robustness is a highly questionable concept.}, -} - -@Article{Rubin-1976, - author = {Donald B. Rubin}, - date = {1976}, - journaltitle = {Biometrika}, - title = {Inference and missing data}, - doi = {10.1093/biomet/63.3.581}, - number = {3}, - pages = {581--592}, - volume = {63}, - publisher = {Oxford University Press ({OUP})}, - abstract = {When making sampling distribution inferences about the parameter of the data, $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are `missing at random' and the observed data are `observed at random', but these inferences are generally conditional on the observed pattern of missing data. When making direct-likelihood or Bayesian inferences about $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are missing at random and the parameter of the missing data process is `distinct' from $\theta$. These conditions are the weakest general conditions under which ignoring the process that causes missing data always leads to correct inferences.}, - publisher = {Oxford University Press ({OUP})}, -} - -@Article{Baron-Kenny-1986, - author = {Reuben M. Baron and David A. Kenny}, - date = {1986}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {The moderator-mediator variable distinction in social psychological research: Conceptual, strategic, and statistical considerations}, - doi = {10.1037/0022-3514.51.6.1173}, - number = {6}, - pages = {1173--1182}, - volume = {51}, - abstract = {In this article, we attempt to distinguish between the properties of moderator and mediator variables at a number of levels. First, we seek to make theorists and researchers aware of the importance of not using the terms moderator and mediator interchangeably by carefully elaborating, both conceptually and strategically, the many ways in which moderators and mediators differ. We then go beyond this largely pedagogical function and delineate the conceptual and strategic implications of making use of such distinctions with regard to a wide range of phenomena, including control and stress, attitudes, and personality traits. We also provide a specific compendium of analytic procedures appropriate for making the most effective use of the moderator and mediator distinction, both separately and in terms of a broader causal system that includes both moderators and mediators.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Browne-1984, - author = {Michael W. Browne}, - date = {1984-05}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Asymptotically distribution-free methods for the analysis of covariance structures}, - doi = {10.1111/j.2044-8317.1984.tb00789.x}, - number = {1}, - pages = {62--83}, - volume = {37}, - abstract = {Methods for obtaining tests of fit of structural models for covariance matrices and estimator standard error which are asymptotically distribution free are derived. Modifications to standard normal theory tests and standard errors which make them applicable to the wider class of elliptical distributions are provided. A random sampling experiment to investigate some of the proposed methods is described.}, - publisher = {Wiley}, -} - -@Article{Efron-1987, - author = {Bradley Efron}, - date = {1987-03}, - journaltitle = {Journal of the American Statistical Association}, - title = {Better bootstrap confidence intervals}, - doi = {10.1080/01621459.1987.10478410}, - number = {397}, - pages = {171--185}, - volume = {82}, - abstract = {We consider the problem of setting approximate confidence intervals for a single parameter $\theta$ in a multiparameter family. The standard approximate intervals based on maximum likelihood theory, $\hat{\theta} \pm \hat{\sigma} z^{\left( \alpha \right)}$, can be quite misleading. In practice, tricks based on transformations, bias corrections, and so forth, are often used to improve their accuracy. The bootstrap confidence intervals discussed in this article automatically incorporate such tricks without requiring the statistician to think them through for each new application, at the price of a considerable increase in computational effort. The new intervals incorporate an improvement over previously suggested methods, which results in second-order correctness in a wide variety of problems. In addition to parametric families, bootstrap intervals are also developed for nonparametric situations.}, - publisher = {Informa {UK} Limited}, - keywords = {resampling methods, approximate confidence intervals, transformations, nonparametric intervals, second-order theory, skewness corrections}, -} - -@Article{Efron-1988, - author = {Bradley Efron}, - date = {1988}, - journaltitle = {Psychological Bulletin}, - title = {Bootstrap confidence intervals: Good or bad?}, - doi = {10.1037/0033-2909.104.2.293}, - number = {2}, - pages = {293--296}, - volume = {104}, - abstract = {The bootstrap is a nonparametric technique for estimating standard errors and approximate confidence intervals. Rasmussen has used a simulation experiment to suggest that bootstrap confidence intervals perform very poorly in the estimation of a correlation coefficient. Part of Rasmussen's simulation is repeated. A careful look at the results shows the bootstrap intervals performing quite well. Some remarks are made concerning the virtues and defects of bootstrap intervals in general.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{James-Brett-1984, - author = {Lawrence R. James and Jeanne M. Brett}, - date = {1984}, - journaltitle = {Journal of Applied Psychology}, - title = {Mediators, moderators, and tests for mediation}, - doi = {10.1037/0021-9010.69.2.307}, - number = {2}, - pages = {307--321}, - volume = {69}, - abstract = {Discusses mediation relations in causal terms. Influences of an antecedent are transmitted to a consequence through an intervening mediator. Mediation relations may assume a number of functional forms, including nonadditive, nonlinear, and nonrecursive forms. Although mediation and moderation are distinguishable processes, with nonadditive forms (moderated mediation) a particular variable may be both a mediator and a moderator within a single set of functional relations. Current models for testing mediation relations in industrial and organizational psychology often involve an interplay between exploratory (correlational) statistical tests and causal inference. It is suggested that no middle ground exists between exploratory and confirmatory (causal) analysis and that attempts to explain how mediation processes occur require specified causal models.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Judd-Kenny-1981, - author = {Charles M. Judd and David A. Kenny}, - date = {1981-10}, - journaltitle = {Evaluation Review}, - title = {Process analysis}, - doi = {10.1177/0193841x8100500502}, - number = {5}, - pages = {602--619}, - volume = {5}, - abstract = {This article presents the rationale and procedures for conducting a process analysis in evaluation research. Such an analysis attempts to identify the process that mediates the effects of some treatment, by estimating the parameters of a causal chain between the treatment and some outcome variable. Two different procedures for estimating mediation are discussed. In addition we present procedures for examining whether a treatment exerts its effects, in part, by altering the mediating process that produces the outcome. Finally, the benefits of process analysis in evaluation research are underlined.}, - publisher = {{SAGE} Publications}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Micceri-1989, - author = {Theodore Micceri}, - date = {1989}, - journaltitle = {Psychological Bulletin}, - title = {The unicorn, the normal curve, and other improbable creatures}, - doi = {10.1037/0033-2909.105.1.156}, - number = {1}, - pages = {156--166}, - volume = {105}, - abtsract = {An investigation of the distributional characteristics of 440 large-sample achievement and psychometric measures found all to be significantly nonnormal at the alpha .01 significance level. Several classes of contamination were found, including tail weights from the uniform to the double exponential, exponential-level asymmetry, severe digit preferences, multimodalities, and modes external to the mean/median interval. Thus, the underlying tenets of normality-assuming statistics appear fallacious for these commonly used types of data. However, findings here also fail to support the types of distributions used in most prior robustness research suggesting the failure of such statistics under nonnormal conditions. A reevaluation of the statistical robustness literature appears appropriate in light of these findings.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Sobel-1982, - author = {Michael E. Sobel}, - date = {1982}, - journaltitle = {Sociological Methodology}, - title = {Asymptotic confidence intervals for indirect effects in structural equation models}, - doi = {10.2307/270723}, - pages = {290}, - volume = {13}, - publisher = {{JSTOR}}, -} - -@Article{Sobel-1986, - author = {Michael E. Sobel}, - date = {1986}, - journaltitle = {Sociological Methodology}, - title = {Some new results on indirect effects and their standard errors in covariance structure models}, - doi = {10.2307/270922}, - pages = {159}, - volume = {16}, - publisher = {{JSTOR}}, -} - -@Article{Sobel-1987, - author = {Michael E. Sobel}, - date = {1987-08}, - journaltitle = {Sociological Methods {\&} Research}, - title = {Direct and indirect effects in linear structural equation models}, - doi = {10.1177/0049124187016001006}, - number = {1}, - pages = {155--176}, - volume = {16}, - abstract = {This article discusses total indirect effects in linear structural equation models. First, I define these effects. Second, I show how the delta method may be used to obtain the standard errors of the sample estimates of these effects and test hypotheses about the magnitudes of the indirect effects. To keep matters simple, I focus throughout on a particularly simple linear structural equation system; for a treatment of the general case, see Sobel (1986). To illustrate the ideas and results, a detailed example is presented.}, - publisher = {{SAGE} Publications}, -} - -@Article{Venzon-Moolgavkar-1988, - author = {D. J. Venzon and S. H. Moolgavkar}, - date = {1988}, - journaltitle = {Applied Statistics}, - title = {A method for computing profile-likelihood-based confidence intervals}, - doi = {10.2307/2347496}, - number = {1}, - pages = {87}, - volume = {37}, - abstract = {The method of constructing confidence regions based on the generalised likelihood ratio statistic is well known for parameter vectors. A similar construction of a confidence interval for a single entry of a vector can be implemented by repeatedly maximising over the other parameters. We present an algorithm for finding these confidence interval endpoints that requires less computation. It employs a modified Newton-Raphson iteration to solve a system of equations that defines the endpoints.}, - publisher = {{JSTOR}}, - keywords = {confidence intervals, profile likelihood}, -} - -@Article{White-1980, - author = {Halbert White}, - date = {1980-05}, - journaltitle = {Econometrica}, - title = {A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity}, - doi = {10.2307/1912934}, - number = {4}, - pages = {817--838}, - volume = {48}, - abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.}, - publisher = {{JSTOR}}, -} - -@Book{Cohen-1988, - author = {Jacob Cohen}, - date = {1988}, - title = {Statistical power analysis for the behavioral sciences}, - doi = {10.4324/9780203771587}, - edition = {2}, - isbn = {9780203771587}, - publisher = {Routledge}, - library = {HA29 .C66 1988}, - keywords = {Social sciences--Statistical methods, Probabilities, Statistical power analysis}, - addendum = {https://lccn.loc.gov/88012110}, - abstract = {Statistical Power Analysis is a nontechnical guide to power analysis in research planning that provides users of applied statistics with the tools they need for more effective analysis. The Second Edition includes: \begin{itemize} \item a chapter covering power analysis in set correlation and multivariate methods; \item a chapter considering effect size, psychometric reliability, and the efficacy of ``qualifying'' dependent variables and; \item expanded power and sample size tables for multiple regression/correlation. \end{itemize}}, -} - -@Book{NationalResearchCouncil-1982, - author = {{National Research Council}}, - date = {1982-01}, - title = {An assessment of research-doctorate programs in the {United States}: Social and behavioral sciences}, - doi = {10.17226/9781}, - location = {Washington, D.C.}, - publisher = {National Academies Press}, - annotation = {data}, -} - -@Book{Rubin-1987, - author = {Donald B. Rubin}, - date = {1987-06}, - title = {Multiple imputation for nonresponse in surveys}, - doi = {10.1002/9780470316696}, - isbn = {9780470316696}, - location = {New York}, - publisher = {John Wiley {\&} Sons, Inc.}, - library = {HA31.2 .R83 1987}, - keywords = {Multiple imputation (Statistics), Nonresponse (Statistics), Social surveys--Response rate}, - addendum = {https://lccn.loc.gov/86028935}, - annotation = {Lib-Missing-Data-Books}, - abstract = {Demonstrates how nonresponse in sample surveys and censuses can be handled by replacing each missing value with two or more multiple imputations. Clearly illustrates the advantages of modern computing to such handle surveys, and demonstrates the benefit of this statistical technique for researchers who must analyze them. Also presents the background for Bayesian and frequentist theory. After establishing that only standard complete-data methods are needed to analyze a multiply-imputed set, the text evaluates procedures in general circumstances, outlining specific procedures for creating imputations in both the ignorable and nonignorable cases. Examples and exercises reinforce ideas, and the interplay of Bayesian and frequentist ideas presents a unified picture of modern statistics.}, -} - -@Article{Bollen-Stine-1990, - author = {Kenneth A. Bollen and Robert Stine}, - date = {1990}, - journaltitle = {Sociological Methodology}, - title = {Direct and indirect effects: Classical and bootstrap estimates of variability}, - doi = {10.2307/271084}, - pages = {115}, - volume = {20}, - abstract = {The decomposition of effects in structural equation models has been of considerable interest to social scientists. Finite-sample or asymptotic results for the sampling distribution of estimators of direct effects are widely available. Statistical inferences about indirect effects have relied exclusively on asymptotic methods which assume that the limiting distribution of the estimator is normal, with a standard error derived from the delta method. We examine bootstrap procedures as another way to generate standard errors and confidence intervals and to estimate the sampling distributions of estimators of direct and indirect effects. We illustrate the classical and the bootstrap methods with three empirical examples. We find that in a moderately large sample, the bootstrap distribution of an estimator is close to that assumed with the classical and delta methods but that in small samples, there are some differences. Bootstrap methods provide a check on the classical and delta methods when the latter are applied under less than ideal conditions.}, - publisher = {{JSTOR}}, -} - -@Article{Li-Raghunathan-Rubin-1991, - author = {K. H. Li and Trivellore Eachambadi Raghunathan and Donald B. Rubin}, - date = {1991-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {Large-sample significance levels from multiply imputed data using moment-based statistics and an {$F$} reference distribution}, - doi = {10.1080/01621459.1991.10475152}, - number = {416}, - pages = {1065--1073}, - volume = {86}, - abstract = {We present a procedure for computing significance levels from data sets whose missing values have been multiply imputed data. This procedure uses moment-based statistics, $m \leq 3$ repeated imputations, and an F reference distribution. When $m = \infty$, we show first that our procedure is essentially the same as the ideal procedure in cases of practical importance and, second, that its deviations from the ideal are basically a function of the coefficient of variation of the canonical ratios of complete to observed information. For small $m$ our procedure's performance is largely governed by this coefficient of variation and the mean of these ratios. Using simulation techniques with small $m$, we compare our procedure's actual and nominal large-sample significance levels and conclude that it is essentially calibrated and thus represents a definite improvement over previously available procedures. Furthermore, we compare the large-sample power of the procedure as a function of $m$ and other factors, such as the dimensionality of the estimand and fraction of missing information, to provide guidance on the choice of the number of imputations; generally, we find the loss of power due to small $m$ to be quite modest in cases likely to occur in practice.}, - publisher = {Informa {UK} Limited}, - keywords = {imputation, missing data, nonresponse, tests of significance}, - annotation = {missing, missing-mi}, -} - -@InBook{Arbuckle-1996, - author = {James L. Arbuckle}, - booktitle = {Advanced structural equation modeling}, - date = {1996}, - title = {Full information estimation in the presence of incomplete data}, - doi = {10.4324/9781315827414}, - editor = {George A. Marcoulides and Randall E. Schumacker}, -} - -@Book{Davison-Hinkley-1997, - author = {Anthony Christopher Davison and David Victor Hinkley}, - publisher = {Cambridge University Press}, - title = {Bootstrap methods and their application}, - series = {Cambridge Series in Statistical and Probabilistic Mathematics}, - date = {1997}, - location = {Cambridge and New York, NY, USA }, - doi = {10.1017/CBO9780511802843}, - isbn = {9780521573917}, - library = {QA276.8 .D38 1997}, - keywords = {Bootstrap (Statistics)}, - addendum = {https://lccn.loc.gov/96030064}, - abstract = {Bootstrap methods are computer-intensive methods of statistical analysis, which use simulation to calculate standard errors, confidence intervals, and significance tests. The methods apply for any level of modelling, and so can be used for fully parametric, semiparametric, and completely nonparametric analysis. This 1997 book gives a broad and up-to-date coverage of bootstrap methods, with numerous applied examples, developed in a coherent way with the necessary theoretical basis. Applications include stratified data; finite populations; censored and missing data; linear, nonlinear, and smooth regression models; classification; time series and spatial problems. Special features of the book include: extensive discussion of significance tests and confidence intervals; material on various diagnostic methods; and methods for efficient computation, including improved Monte Carlo simulation. Each chapter includes both practical and theoretical exercises. S-Plus programs for implementing the methods described in the text are available from the supporting website.}, - annotation = {bootstrap}, -} - -@Book{Efron-Tibshirani-1993, - author = {Bradley Efron and Robert J. Tibshirani}, - publisher = {Chapman \& Hall}, - title = {An introduction to the bootstrap}, - series = {Monographs on statistics and applied probability ; 57}, - date = {1993}, - location = {New York}, - doi = {10.1201/9780429246593}, - isbn = {9780412042317}, - library = {QA276.8 .E3745 1993}, - addendum = {https://lccn.loc.gov/93004489}, - abstract = {Statistics is a subject of many uses and surprisingly few effective practitioners. The traditional road to statistical knowledge is blocked, for most, by a formidable wall of mathematics. The approach in An Introduction to the Bootstrap avoids that wall. It arms scientists and engineers, as well as statisticians, with the computational techniques they need to analyze and understand complicated data sets.}, - keywords = {Bootstrap (Statistics)}, -} - -@Book{Schafer-1997, - author = {Joseph L. Schafer}, - date = {1997-08}, - title = {Analysis of incomplete multivariate data}, - doi = {10.1201/9780367803025}, - isbn = {9780367803025}, - abstract = {The last two decades have seen enormous developments in statistical methods for incomplete data. The EM algorithm and its extensions, multiple imputation, and Markov Chain Monte Carlo provide a set of flexible and reliable tools from inference in large classes of missing-data problems. Yet, in practical terms, those developments have had surprisingly little impact on the way most data analysts handle missing values on a routine basis. - Analysis of Incomplete Multivariate Data helps bridge the gap between theory and practice, making these missing-data tools accessible to a broad audience. It presents a unified, Bayesian approach to the analysis of incomplete multivariate data, covering datasets in which the variables are continuous, categorical, or both. The focus is applied, where necessary, to help readers thoroughly understand the statistical properties of those methods, and the behavior of the accompanying algorithms. - All techniques are illustrated with real data examples, with extended discussion and practical advice. All of the algorithms described in this book have been implemented by the author for general use in the statistical languages S and S Plus. The software is available free of charge on the Internet.}, - publisher = {Chapman and Hall/CRC}, -} - -@Article{Bauer-Preacher-Gil-2006, - author = {Daniel J. Bauer and Kristopher J. Preacher and Karen M. Gil}, - date = {2006}, - journaltitle = {Psychological Methods}, - title = {Conceptualizing and testing random indirect effects and moderated mediation in multilevel models: New procedures and recommendations}, - doi = {10.1037/1082-989x.11.2.142}, - number = {2}, - pages = {142--163}, - volume = {11}, - abstracts = {The authors propose new procedures for evaluating direct, indirect, and total effects in multilevel models when all relevant variables are measured at Level 1 and all effects are random. Formulas are provided for the mean and variance of the indirect and total effects and for the sampling variances of the average indirect and total effects. Simulations show that the estimates are unbiased under most conditions. Confidence intervals based on a normal approximation or a simulated sampling distribution perform well when the random effects are normally distributed but less so when they are nonnormally distributed. These methods are further developed to address hypotheses of moderated mediation in the multilevel context. An example demonstrates the feasibility and usefulness of the proposed methods.}, - publisher = {American Psychological Association ({APA})}, - keywords = {multilevel model, hierarchical linear model, indirect effect, mediation, moderated mediation}, -} - -@Article{Cheung-2009a, - author = {Mike W.-L. Cheung}, - date = {2009-05}, - journaltitle = {Behavior Research Methods}, - title = {Comparison of methods for constructing confidence intervals of standardized indirect effects}, - doi = {10.3758/brm.41.2.425}, - number = {2}, - pages = {425--438}, - volume = {41}, - abstract = {Mediation models are often used as a means to explain the psychological mechanisms between an independent and a dependent variable in the behavioral and social sciences. A major limitation of the unstandardized indirect effect calculated from raw scores is that it cannot be interpreted as an effect-size measure. In contrast, the standardized indirect effect calculated from standardized scores can be a good candidate as a measure of effect size because it is scale invariant. In the present article, 11 methods for constructing the confidence intervals (CIs) of the standardized indirect effects were evaluated via a computer simulation. These included six Wald CIs, three bootstrap CIs, one likelihood-based CI, and the PRODCLIN CI. The results consistently showed that the percentile bootstrap, the bias-corrected bootstrap, and the likelihood-based approaches had the best coverage probability. Mplus, LISREL, and Mx syntax were included to facilitate the use of these preferred methods in applied settings. Future issues on the use of the standardized indirect effects are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, coverage probability, structural equation modeling approach}, -} - -@Article{Cheung-2009b, - author = {Mike W.-L. Cheung}, - date = {2009-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Constructing approximate confidence intervals for parameters with structural equation models}, - doi = {10.1080/10705510902751291}, - number = {2}, - pages = {267--294}, - volume = {16}, - abstract = {Confidence intervals (CIs) for parameters are usually constructed based on the estimated standard errors. These are known as Wald CIs. This article argues that likelihood-based CIs (CIs based on likelihood ratio statistics) are often preferred to Wald CIs. It shows how the likelihood-based CIs and the Wald CIs for many statistics and psychometric indexes can be constructed with the use of phantom variables (Rindskopf, 1984) in some of the current structural equation modeling (SEM) packages. The procedures to form CIs for the differences in correlation coefficients, squared multiple correlations, indirect effects, coefficient alphas, and reliability estimates are illustrated. A simulation study on the Pearson correlation is used to demonstrate the advantages of the likelihood-based CI over the Wald CI. Issues arising from this SEM approach and extensions of this approach are discussed.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Cheung-Lau-2007, - author = {Gordon W. Cheung and Rebecca S. Lau}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Testing mediation and suppression effects of latent variables}, - doi = {10.1177/1094428107300343}, - number = {2}, - pages = {296--325}, - volume = {11}, - abstract = {Because of the importance of mediation studies, researchers have been continuously searching for the best statistical test for mediation effect. The approaches that have been most commonly employed include those that use zero-order and partial correlation, hierarchical regression models, and structural equation modeling (SEM). This study extends MacKinnon and colleagues (MacKinnon, Lockwood, Hoffmann, West, \& Sheets, 2002; MacKinnon, Lockwood, \& Williams, 2004, MacKinnon, Warsi, \& Dwyer, 1995) works by conducting a simulation that examines the distribution of mediation and suppression effects of latent variables with SEM, and the properties of confidence intervals developed from eight different methods. Results show that SEM provides unbiased estimates of mediation and suppression effects, and that the bias-corrected bootstrap confidence intervals perform best in testing for mediation and suppression effects. Steps to implement the recommended procedures with Amos are presented.}, - publisher = {{SAGE} Publications}, - keywords = {mediating effects, suppression effects, structural equation modeling}, -} - -@Article{CribariNeto-Souza-Vasconcellos-2007, - author = {Francisco Cribari-Neto and Tatiene C. Souza and Klaus L. P. Vasconcellos}, - date = {2007-08}, - journaltitle = {Communications in Statistics - Theory and Methods}, - title = {Inference under heteroskedasticity and leveraged data}, - doi = {10.1080/03610920601126589}, - number = {10}, - pages = {1877--1888}, - volume = {36}, - abstract = {We evaluate the finite-sample behavior of different heteros-ke-das-ticity-consistent covariance matrix estimators, under both constant and unequal error variances. We consider the estimator proposed by Halbert White (HC0), and also its variants known as HC2, HC3, and HC4; the latter was recently proposed by Cribari-Neto (2004). We propose a new covariance matrix estimator: HC5. It is the first consistent estimator to explicitly take into account the effect that the maximal leverage has on the associated inference. Our numerical results show that quasi-$t$ inference based on HC5 is typically more reliable than inference based on other covariance matrix estimators.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Fritz-MacKinnon-2007, - author = {Matthew S. Fritz and David P. MacKinnon}, - date = {2007-03}, - journaltitle = {Psychological Science}, - title = {Required sample size to detect the mediated effect}, - doi = {10.1111/j.1467-9280.2007.01882.x}, - number = {3}, - pages = {233--239}, - volume = {18}, - abstract = {Mediation models are widely used, and there are many tests of the mediated effect. One of the most common questions that researchers have when planning mediation studies is, ``How many subjects do I need to achieve adequate power when testing for mediation?'' This article presents the necessary sample sizes for six of the most common and the most recommended tests of mediation for various combinations of parameters, to provide a guide for researchers when designing studies or applying for grants.}, - publisher = {{SAGE} Publications}, - keywords = {bootstrap, collinearity, mediation analysis, power, tolerance}, -} - -@Article{Graham-Olchowski-Gilreath-2007, - author = {John W. Graham and Allison E. Olchowski and Tamika D. Gilreath}, - date = {2007-06}, - journaltitle = {Prevention Science}, - title = {How many imputations are really needed? Some practical clarifications of multiple imputation theory}, - doi = {10.1007/s11121-007-0070-9}, - number = {3}, - pages = {206--213}, - volume = {8}, - abstract = {Multiple imputation (MI) and full information maximum likelihood (FIML) are the two most common approaches to missing data analysis. In theory, MI and FIML are equivalent when identical models are tested using the same variables, and when m, the number of imputations performed with MI, approaches infinity. However, it is important to know how many imputations are necessary before MI and FIML are sufficiently equivalent in ways that are important to prevention scientists. MI theory suggests that small values of m, even on the order of three to five imputations, yield excellent results. Previous guidelines for sufficient m are based on relative efficiency, which involves the fraction of missing information ($\gamma$) for the parameter being estimated, and m. In the present study, we used a Monte Carlo simulation to test MI models across several scenarios in which $\gamma$ and m were varied. Standard errors and p-values for the regression coefficient of interest varied as a function of m, but not at the same rate as relative efficiency. Most importantly, statistical power for small effect sizes diminished as m became smaller, and the rate of this power falloff was much greater than predicted by changes in relative efficiency. Based our findings, we recommend that researchers using MI should perform many more imputations than previously considered sufficient. These recommendations are based on $\gamma$, and take into consideration one's tolerance for a preventable power falloff (compared to FIML) due to using too few imputations.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {multiple imputation, number of imputations, full information maximum likelihood, missing data, statistical power}, -} - -@Article{MacKinnon-Fritz-Williams-etal-2007, - author = {David P. MacKinnon and Matthew S. Fritz and Jason Williams and Chondra M. Lockwood}, - date = {2007-08}, - journaltitle = {Behavior Research Methods}, - title = {Distribution of the product confidence limits for the indirect effect: Program {PRODCLIN}}, - doi = {10.3758/bf03193007}, - number = {3}, - pages = {384--389}, - volume = {39}, - abstract = {This article describes a program, PRODCLIN (distribution of the PRODuct Confidence Limits for INdirect effects), written for SAS, SPSS, and R, that computes confidence limits for the product of two normal random variables. The program is important because it can be used to obtain more accurate confidence limits for the indirect effect, as demonstrated in several recent articles (MacKinnon, Lockwood, \& Williams, 2004; Pituch, Whittaker, \& Stapleton, 2005). Tests of the significance of and confidence limits for indirect effects based on the distribution of the product method have more accurate Type I error rates and more power than other, more commonly used tests. Values for the two paths involved in the indirect effect and their standard errors are entered in the PRODCLIN program, and distribution of the product confidence limits are computed. Several examples are used to illustrate the PRODCLIN program. The PRODCLIN programs in rich text format may be downloaded from www.psychonomic.org/archive.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{MacKinnon-Lockwood-Hoffman-etal-2002, - author = {David P. MacKinnon and Chondra M. Lockwood and Jeanne M. Hoffman and Stephen G. West and Virgil Sheets}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {A comparison of methods to test mediation and other intervening variable effects}, - doi = {10.1037/1082-989x.7.1.83}, - number = {1}, - pages = {83--104}, - volume = {7}, - abstract = {A Monte Carlo study compared 14 methods to test the statistical significance of the intervening variable effect. An intervening variable (mediator) transmits the effect of an independent variable to a dependent variable. The commonly used R. M. Baron and D. A. Kenny (1986) approach has low statistical power. Two methods based on the distribution of the product and 2 difference-in-coefficients methods have the most accurate Type I error rates and greatest statistical power except in 1 important case in which Type I error rates are too high. The best balance of Type I error and statistical power across all cases is the test of the joint significance of the two effects comprising the intervening variable effect.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{MacKinnon-Lockwood-Williams-2004, - author = {David P. MacKinnon and Chondra M. Lockwood and Jason Williams}, - date = {2004-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {Confidence limits for the indirect effect: Distribution of the product and resampling methods}, - doi = {10.1207/s15327906mbr3901_4}, - number = {1}, - pages = {99--128}, - volume = {39}, - abstract = {The most commonly used method to test an indirect effect is to divide the estimate of the indirect effect by its standard error and compare the resulting z statistic with a critical value from the standard normal distribution. Confidence limits for the indirect effect are also typically based on critical values from the standard normal distribution. This article uses a simulation study to demonstrate that confidence limits are imbalanced because the distribution of the indirect effect is normal only in special cases. Two alternatives for improving the performance of confidence limits for the indirect effect are evaluated: (a) a method based on the distribution of the product of two normal random variables, and (b) resampling methods. In Study 1, confidence limits based on the distribution of the product are more accurate than methods based on an assumed normal distribution but confidence limits are still imbalanced. Study 2 demonstrates that more accurate confidence limits are obtained using resampling methods, with the bias-corrected bootstrap the best method overall.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-montecarlo, mediation-prodclin}, -} - -@Article{Peugh-Enders-2004, - author = {James L. Peugh and Craig K. Enders}, - date = {2004-12}, - journaltitle = {Review of Educational Research}, - title = {Missing data in educational research: A review of reporting practices and suggestions for improvement}, - doi = {10.3102/00346543074004525}, - number = {4}, - pages = {525--556}, - volume = {74}, - publisher = {American Educational Research Association ({AERA})}, - abstract = {Missing data analyses have received considerable recent attention in the methodological literature, and two ``modern'' methods, multiple imputation and maximum likelihood estimation, are recommended. The goals of this article are to (a) provide an overview of missing-data theory, maximum likelihood estimation, and multiple imputation; (b) conduct a methodological review of missing-data reporting practices in 23 applied research journals; and (c) provide a demonstration of multiple imputation and maximum likelihood estimation using the Longitudinal Study of American Youth data. The results indicated that explicit discussions of missing data increased substantially between 1999 and 2003, but the use of maximum likelihood estimation or multiple imputation was rare; the studies relied almost exclusively on listwise and pairwise deletion.}, - keywords = {EM algorithm, maximum likelihood estimation, missing data, multiple imputation, NORM}, -} - -@Article{Preacher-Hayes-2004, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2004-11}, - journaltitle = {Behavior Research Methods, Instruments, \& Computers}, - title = {{SPSS} and {SAS} procedures for estimating indirect effects in simple mediation models}, - doi = {10.3758/bf03206553}, - number = {4}, - pages = {717--731}, - volume = {36}, - abstract = {Researchers often conduct mediation analysis in order to indirectly assess the effect of a proposed cause on some outcome through a proposed mediator. The utility of mediation analysis stems from its ability to go beyond the merely descriptive to a more functional understanding of the relationships among variables. A necessary component of mediation is a statistically and practically significant indirect effect. Although mediation hypotheses are frequently explored in psychological research, formal significance tests of indirect effects are rarely conducted. After a brief overview of mediation, we argue the importance of directly testing the significance of indirect effects and provide SPSS and SAS macros that facilitate estimation of the indirect effect with a normal theory approach and a bootstrap approach to obtaining confidence intervals, as well as the traditional approach advocated by Baron and Kenny (1986). We hope that this discussion and the macros will enhance the frequency of formal mediation tests in the psychology literature. Electronic copies of these macros may be downloaded from the Psychonomic Society’s Web archive at www.psychonomic.org/archive/.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {life satisfaction, indirect effect, mediation analysis, cognitive therapy, Sobel test}, -} - -@Article{Preacher-Hayes-2008, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2008-08}, - journaltitle = {Behavior Research Methods}, - title = {Asymptotic and resampling strategies for assessing and comparing indirect effects in multiple mediator models}, - doi = {10.3758/brm.40.3.879}, - number = {3}, - pages = {879--891}, - volume = {40}, - abstract = {Hypotheses involving mediation are common in the behavioral sciences. Mediation exists when a predictor affects a dependent variable indirectly through at least one intervening variable, or mediator. Methods to assess mediation involving multiple simultaneous mediators have received little attention in the methodological literature despite a clear need. We provide an overview of simple and multiple mediation and explore three approaches that can be used to investigate indirect processes, as well as methods for contrasting two or more mediators within a single model. We present an illustrative example, assessing and contrasting potential mediators of the relationship between the helpfulness of socialization agents and job satisfaction. We also provide SAS and SPSS macros, as well as Mplus and LISREL syntax, to facilitate the use of these methods in applications.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {indirect effect, structural equation modeling, residual covariance, total indirect effect, multiple mediator model}, -} - -@Article{Raghunathan-Lepkowski-Hoewyk-etal-2001, - author = {Trivellore E. Raghunathan and James M. Lepkowski and John Van Hoewyk and Peter Solenberger}, - date = {2001}, - journaltitle = {Survey Methodology}, - title = {A multivariate technique for multiply imputing missing values using a sequence of regression models}, - number = {1}, - pages = {85--95}, - volume = {27}, - abstract = {This article describes and evaluates a procedure for imputing missing values for a relatively complex data structure when the data are missing at random. The imputations are obtained by fitting a sequence of regression models and drawing values from the corresponding predictive distributions. The types of regression models used are linear, logistic, Poisson, generalized logit or a mixture of these depending on the type of variable being imputed. Two additional common features in the imputation process are incorporated: restriction to a relevant subpopulation for some variables and logical bounds or constraints for the imputed values. The restrictions involve subsetting the sample individuals that satisfy certain criteria while fitting the regression models. The bounds involve drawing values from a truncated predictive distribution. The development of this method was partly motivated by the analysis of two data sets which are used as illustrations. The sequential regression procedure is applied to perform multiple imputation analysis for the two applied problems. The sampling properties of inferences from multiply imputed data sets created using the sequential regression method are evaluated through simulated data sets.}, - keywords = {item nonresponse, missing at random, multiple imputation, nonignorable missing mechanism, regression, sampling properties and simulations}, -} - -@Article{Schafer-Graham-2002, - author = {Joseph L. Schafer and John W. Graham}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Missing data: Our view of the state of the art}, - doi = {10.1037/1082-989x.7.2.147}, - number = {2}, - pages = {147--177}, - volume = {7}, - abstract = {Statistical procedures for missing data have vastly improved, yet misconception and unsound practice still abound. The authors frame the missing-data problem, review methods, offer advice, and raise issues that remain unresolved. They clear up common misunderstandings regarding the missing at random (MAR) concept. They summarize the evidence against older procedures and, with few exceptions, discourage their use. They present, in both technical and practical language, 2 general approaches that come highly recommended: maximum likelihood (ML) and Bayesian multiple imputation (MI). Newer developments are discussed, including some for dealing with missing data that are not MAR. Although not yet in the mainstream, these procedures may eventually extend the ML and MI methods that currently represent the state of the art.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Serlin-2000, - author = {Ronald C. Serlin}, - date = {2000}, - journaltitle = {Psychological Methods}, - title = {Testing for robustness in {Monte Carlo} studies}, - doi = {10.1037/1082-989x.5.2.230}, - number = {2}, - pages = {230--240}, - volume = {5}, - abstract = {Monte Carlo studies provide the information needed to help researchers select appropriate analytical procedures under design conditions in which the underlying assumptions of the procedures are not met. In Monte Carlo studies, the 2 errors that one could commit involve (a) concluding that a statistical procedure is robust when it is not or (b) concluding that it is not robust when it is. In previous attempts to apply standard statistical design principles to Monte Carlo studies, the less severe of these errors has been wrongly designated the Type I error. In this article, a method is presented for controlling the appropriate Type I error rate; the determination of the number of iterations required in a Monte Carlo study to achieve desired power is described; and a confidence interval for a test's true Type I error rate is derived. A robustness criterion is also proposed that is a compromise between W. G. Cochran's (1952) and J. V. Bradley's (1978) criteria.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Shrout-Bolger-2002, - author = {Patrick E. Shrout and Niall Bolger}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Mediation in experimental and nonexperimental studies: New procedures and recommendations}, - doi = {10.1037/1082-989x.7.4.422}, - number = {4}, - pages = {422--445}, - volume = {7}, - publisher = {American Psychological Association ({APA})}, - abstract = {Mediation is said to occur when a causal effect of some variable $X$ on an outcome $Y$ is explained by some intervening variable $M$. The authors recommend that with small to moderate samples, bootstrap methods (B. Efron \& R. Tibshirani, 1993) be used to assess mediation. Bootstrap tests are powerful because they detect that the sampling distribution of the mediated effect is skewed away from 0. They argue that R. M. Baron and D. A. Kenny's (1986) recommendation of first testing the $X \to Y$ association for statistical significance should not be a requirement when there is a priori belief that the effect size is small or suppression is a possibility. Empirical examples and computer setups for bootstrap analyses are provided.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Taylor-MacKinnon-Tein-2007, - author = {Aaron B. Taylor and David P. MacKinnon and Jenn-Yun Tein}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Tests of the three-path mediated effect}, - doi = {10.1177/1094428107300344}, - number = {2}, - pages = {241--269}, - volume = {11}, - abstract = {In a three-path mediational model, two mediators intervene in a series between an independent and a dependent variable. Methods of testing for mediation in such a model are generalized from the more often used single-mediator model. Six such methods are introduced and compared in a Monte Carlo study in terms of their Type I error, power, and coverage. Based on its results, the joint significance test is preferred when only a hypothesis test is of interest. The percentile bootstrap and bias-corrected bootstrap are preferred when a confidence interval on the mediated effect is desired, with the latter having more power but also slightly inflated Type I error in some conditions.}, - publisher = {{SAGE} Publications}, - keywords = {mediation, bootstrapping}, -} - -@Article{vanBuuren-Brand-GroothuisOudshoorn-etal-2006, - author = {Stef {van Buuren} and J. P. L. Brand and C. G. M. Groothuis-Oudshoorn and Donald B. Rubin}, - date = {2006-12}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Fully conditional specification in multivariate imputation}, - doi = {10.1080/10629360600810434}, - number = {12}, - pages = {1049--1064}, - volume = {76}, - abstract = {The use of the Gibbs sampler with fully conditionally specified models, where the distribution of each variable given the other variables is the starting point, has become a popular method to create imputations in incomplete multivariate data. The theoretical weakness of this approach is that the specified conditional densities can be incompatible, and therefore the stationary distribution to which the Gibbs sampler attempts to converge may not exist. This study investigates practical consequences of this problem by means of simulation. Missing data are created under four different missing data mechanisms. Attention is given to the statistical behavior under compatible and incompatible models. The results indicate that multiple imputation produces essentially unbiased estimates with appropriate coverage in the simple cases investigated, even for the incompatible models. Of particular interest is that these results were produced using only five Gibbs iterations starting from a simple draw from observed marginal distributions. It thus appears that, despite the theoretical weaknesses, the actual performance of conditional model specification for multivariate imputation can be quite good, and therefore deserves further study.}, - publisher = {Informa {UK} Limited}, - keywords = {multivariate missing data, multiple imputation, distributional compatibility, Gibbs sampling, simulation, proper imputation}, -} - -@Article{Yuan-Bentler-2000, - author = {Ke-Hai Yuan and Peter M. Bentler}, - date = {2000-08}, - journaltitle = {Sociological Methodology}, - title = {Three likelihood-based methods for mean and covariance structure analysis with nonnormal missing data}, - doi = {10.1111/0081-1750.00078}, - number = {1}, - pages = {165--200}, - volume = {30}, - abstract = {Survey and longitudinal studies in the social and behavioral sciences generally contain missing data. Mean and covariance structure models play an important role in analyzing such data. Two promising methods for dealing with missing data are a direct maximum-likelihood and a two-stage approach based on the unstructured mean and covariance estimates obtained by the EM-algorithm. Typical assumptions under these two methods are ignorable nonresponse and normality of data. However, data sets in social and behavioral sciences are seldom normal, and experience with these procedures indicates that normal theory based methods for nonnormal data very often lead to incorrect model evaluations. By dropping the normal distribution assumption, we develop more accurate procedures for model inference. Based on the theory of generalized estimating equations, a way to obtain consistent standard errors of the two-stage estimates is given. The asymptotic efficiencies of different estimators are compared under various assumptions. We also propose a minimum chi-square approach and show that the estimator obtained by this approach is asymptotically at least as efficient as the two likelihood-based estimators for either normal or nonnormal data. The major contribution of this paper is that for each estimator, we give a test statistic whose asymptotic distribution is chisquare as long as the underlying sampling distribution enjoys finite fourth-order moments. We also give a characterization for each of the two likelihood ratio test statistics when the underlying distribution is nonnormal. Modifications to the likelihood ratio statistics are also given. Our working assumption is that the missing data mechanism is missing completely at random. Examples and Monte Carlo studies indicate that, for commonly encountered nonnormal distributions, the procedures developed in this paper are quite reliable even for samples with missing data that are missing at random.}, - publisher = {{SAGE} Publications}, -} - -@Book{MacKinnon-2008, - author = {David P. MacKinnon}, - series = {Multivariate applications}, - date = {2008}, - title = {Introduction to statistical mediation analysis}, - doi = {10.4324/9780203809556}, - isbn = {9780805864298}, - location = {Hoboken}, - pages = {488}, - library = {QA278.2 .M29 2008}, - addendum = {https://lccn.loc.gov/2007011793}, - abstract = {This volume introduces the statistical, methodological, and conceptual aspects of mediation analysis. Applications from health, social, and developmental psychology, sociology, communication, exercise science, and epidemiology are emphasized throughout. Single-mediator, multilevel, and longitudinal models are reviewed. The author's goal is to help the reader apply mediation analysis to their own data and understand its limitations. - Each chapter features an overview, numerous worked examples, a summary, and exercises (with answers to the odd numbered questions). The accompanying downloadable resources contain outputs described in the book from SAS, SPSS, LISREL, EQS, MPLUS, and CALIS, and a program to simulate the model. The notation used is consistent with existing literature on mediation in psychology. - The book opens with a review of the types of research questions the mediation model addresses. Part II describes the estimation of mediation effects including assumptions, statistical tests, and the construction of confidence limits. Advanced models including mediation in path analysis, longitudinal models, multilevel data, categorical variables, and mediation in the context of moderation are then described. The book closes with a discussion of the limits of mediation analysis, additional approaches to identifying mediating variables, and future directions. - Introduction to Statistical Mediation Analysis is intended for researchers and advanced students in health, social, clinical, and developmental psychology as well as communication, public health, nursing, epidemiology, and sociology. Some exposure to a graduate level research methods or statistics course is assumed. The overview of mediation analysis and the guidelines for conducting a mediation analysis will be appreciated by all readers.}, - publisher = {Erlbaum Psych Press}, - keywords = {Mediation (Statistics)}, -} - -@Book{Venables-Ripley-2002, - author = {W. N. Venables and B. D. Ripley}, - date = {2002}, - title = {Modern applied statistics with {S}}, - doi = {10.1007/978-0-387-21706-2}, - publisher = {Springer New York}, -} - -@Article{Biesanz-Falk-Savalei-2010, - author = {Jeremy C. Biesanz and Carl F. Falk and Victoria Savalei}, - date = {2010-08}, - journaltitle = {Multivariate Behavioral Research}, - title = {Assessing mediational models: Testing and interval estimation for indirect effects}, - doi = {10.1080/00273171.2010.498292}, - number = {4}, - pages = {661--701}, - volume = {45}, - abstract = {Theoretical models specifying indirect or mediated effects are common in the social sciences. An indirect effect exists when an independent variable's influence on the dependent variable is mediated through an intervening variable. Classic approaches to assessing such mediational hypotheses (Baron \& Kenny, 1986; Sobel, 1982) have in recent years been supplemented by computationally intensive methods such as bootstrapping, the distribution of the product methods, and hierarchical Bayesian Markov chain Monte Carlo (MCMC) methods. These different approaches for assessing mediation are illustrated using data from Dunn, Biesanz, Human, and Finn (2007). However, little is known about how these methods perform relative to each other, particularly in more challenging situations, such as with data that are incomplete and/or nonnormal. This article presents an extensive Monte Carlo simulation evaluating a host of approaches for assessing mediation. We examine Type I error rates, power, and coverage. We study normal and nonnormal data as well as complete and incomplete data. In addition, we adapt a method, recently proposed in statistical literature, that does not rely on confidence intervals (CIs) to test the null hypothesis of no indirect effect. The results suggest that the new inferential method--the partial posterior p value--slightly outperforms existing ones in terms of maintaining Type I error rates while maximizing power, especially with incomplete data. Among confidence interval approaches, the bias-corrected accelerated (BCa) bootstrapping approach often has inflated Type I error rates and inconsistent coverage and is not recommended.In contrast, the bootstrapped percentile confidence interval and the hierarchical Bayesian MCMC method perform best overall,maintaining Type I error rates,exhibiting reasonable power, and producing stable and accurate coverage rates.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Blanca-Arnau-LopezMontiel-etal-2013, - author = {Mar\'\iaJ. Blanca and Jaume Arnau and Dolores L{\a'o}pez-Montiel and Roser Bono and Rebecca Bendayan}, - date = {2013-05}, - journaltitle = {Methodology}, - title = {Skewness and kurtosis in real data samples}, - doi = {10.1027/1614-2241/a000057}, - number = {2}, - pages = {78--84}, - volume = {9}, - abstract = {Parametric statistics are based on the assumption of normality. Recent findings suggest that Type I error and power can be adversely affected when data are non-normal. This paper aims to assess the distributional shape of real data by examining the values of the third and fourth central moments as a measurement of skewness and kurtosis in small samples. The analysis concerned 693 distributions with a sample size ranging from 10 to 30. Measures of cognitive ability and of other psychological variables were included. The results showed that skewness ranged between -2.49 and 2.33. The values of kurtosis ranged between -1.92 and 7.41. Considering skewness and kurtosis together the results indicated that only 5.5\% of distributions were close to expected values under normality. Although extreme contamination does not seem to be very frequent, the findings are consistent with previous research suggesting that normality is not the rule with real data.}, - publisher = {Hogrefe Publishing Group}, -} - -@Article{Boettiger-Eddelbuettel-2017, - author = {Carl Boettiger and Dirk Eddelbuettel}, - date = {2017}, - journaltitle = {The R Journal}, - title = {An introduction to {Rocker}: Docker containers for {R}}, - doi = {10.32614/rj-2017-065}, - number = {2}, - pages = {527}, - volume = {9}, - abstract = {We describe the Rocker project, which provides a widely-used suite of Docker images with customized R environments for particular tasks. We discuss how this suite is organized, and how these tools can increase portability, scaling, reproducibility, and convenience of R users and developers.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-docker-rocker}, -} - -@Article{Chow-Ho-Hamaker-etal-2010, - author = {Sy-Miin Chow and Moon-ho R. Ho and Ellen L. Hamaker and Conor V. Dolan}, - date = {2010-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Equivalence and differences between structural equation modeling and state-space modeling techniques}, - doi = {10.1080/10705511003661553}, - number = {2}, - pages = {303--332}, - volume = {17}, - abstract = {State-space modeling techniques have been compared to structural equation modeling (SEM) techniques in various contexts but their unique strengths have often been overshadowed by their similarities to SEM. In this article, we provide a comprehensive discussion of these 2 approaches' similarities and differences through analytic comparisons and numerical simulations, with a focus on their use in representing intraindividual dynamics and interindividual differences. To demonstrate the respective strengths and weaknesses of the 2 approaches in representing these 2 aspects, we simulated data under (a) a cross-sectional common factor model, (b) a latent difference score model with random effects in intercept and slope, and (c) a bivariate dynamic factor analysis model with auto- and cross-regression parameters. Possible ways in which SEM and state-space modeling can be utilized as complementary tools in representing human developmental and other related processes are discussed.}, - publisher = {Informa {UK} Limited}, - annotation = {ild, sem, ssm}, -} - -@Article{Deboeck-Preacher-2015, - author = {Pascal R. Deboeck and Kristopher J. Preacher}, - date = {2015-06}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {No Need to be Discrete: A Method for Continuous Time Mediation Analysis}, - doi = {10.1080/10705511.2014.973960}, - number = {1}, - pages = {61--75}, - volume = {23}, - abstract = {Mediation is one concept that has shaped numerous theories. The list of problems associated with mediation models, however, has been growing. Mediation models based on cross-sectional data can produce unexpected estimates, so much so that making longitudinal or causal inferences is inadvisable. Even longitudinal mediation models have faults, as parameter estimates produced by these models are specific to the lag between observations, leading to much debate over appropriate lag selection. Using continuous time models (CTMs) rather than commonly employed discrete time models, one can estimate lag-independent parameters. We demonstrate methodology that allows for continuous time mediation analyses, with attention to concepts such as indirect and direct effects, partial mediation, the effect of lag, and the lags at which relations become maximal. A simulation compares common longitudinal mediation methods with CTMs. Reanalysis of a published covariance matrix demonstrates that CTMs can be fit to data used in longitudinal mediation studies.}, - publisher = {Informa {UK} Limited}, - keywords = {continuous time models, cross-lagged panel model, exact discrete model, longitudinal mediation, mediation}, - annotation = {ild, ild-mediation}, -} - -@Article{Dudgeon-2017, - author = {Paul Dudgeon}, - date = {2017-03}, - journaltitle = {Psychometrika}, - title = {Some improvements in confidence intervals for standardized regression coefficients}, - doi = {10.1007/s11336-017-9563-z}, - number = {4}, - pages = {928--951}, - volume = {82}, - keywords = {standardized regression coefficients, robust confidence intervals, non-normality}, - abstract = {Yuan and Chan (Psychometrika 76:670–690, 2011. doi:10.1007/S11336-011-9224-6) derived consistent confidence intervals for standardized regression coefficients under fixed and random score assumptions. Jones and Waller (Psychometrika 80:365–378, 2015. doi:10.1007/S11336-013-9380-Y) extended these developments to circumstances where data are non-normal by examining confidence intervals based on Browne's (Br J Math Stat Psychol 37:62–83, 1984. doi:10.1111/j.2044-8317.1984.tb00789.x) asymptotic distribution-free (ADF) theory. Seven different heteroscedastic-consistent (HC) estimators were investigated in the current study as potentially better solutions for constructing confidence intervals on standardized regression coefficients under non-normality. Normal theory, ADF, and HC estimators were evaluated in a Monte Carlo simulation. Findings confirmed the superiority of the HC3 (MacKinnon and White, J Econ 35:305–325, 1985. doi:10.1016/0304-4076(85)90158-7) and HC5 (Cribari-Neto and Da Silva, Adv Stat Anal 95:129–146, 2011. doi:10.1007/s10182-010-0141-2) interval estimators over Jones and Waller's ADF estimator under all conditions investigated, as well as over the normal theory method. The HC5 estimator was more robust in a restricted set of conditions over the HC3 estimator. Some possible extensions of HC estimators to other effect size measures are considered for future developments.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Eddelbuettel-Francois-2011, - author = {Dirk Eddelbuettel and Romain Fran{\c c}ois}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{Rcpp}: Seamless {R} and {C++} integration}, - doi = {10.18637/jss.v040.i08}, - number = {8}, - volume = {40}, - abstract = {The Rcpp package simplifies integrating C++ code with R. It provides a consistent C++ class hierarchy that maps various types of R objects (vectors, matrices, functions, environments, ...) to dedicated C++ classes. Object interchange between R and C++ is managed by simple, flexible and extensible concepts which include broad support for C++ Standard Template Library idioms. C++ code can both be compiled, linked and loaded on the fly, or added via packages. Flexible error and exception code handling is provided. Rcpp substantially lowers the barrier for programmers wanting to combine C++ code with R.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages}, -} - -@Article{Hayes-Scharkow-2013, - author = {Andrew F. Hayes and Michael Scharkow}, - date = {2013-08}, - journaltitle = {Psychological Science}, - title = {The relative trustworthiness of inferential tests of the indirect effect in statistical mediation analysis}, - doi = {10.1177/0956797613480187}, - number = {10}, - pages = {1918--1927}, - volume = {24}, - abstract = {A content analysis of 2 years of Psychological Science articles reveals inconsistencies in how researchers make inferences about indirect effects when conducting a statistical mediation analysis. In this study, we examined the frequency with which popularly used tests disagree, whether the method an investigator uses makes a difference in the conclusion he or she will reach, and whether there is a most trustworthy test that can be recommended to balance practical and performance considerations. We found that tests agree much more frequently than they disagree, but disagreements are more common when an indirect effect exists than when it does not. We recommend the bias-corrected bootstrap confidence interval as the most trustworthy test if power is of utmost concern, although it can be slightly liberal in some circumstances. Investigators concerned about Type I errors should choose the Monte Carlo confidence interval or the distribution-of-the-product approach, which rarely disagree. The percentile bootstrap confidence interval is a good compromise test.}, - publisher = {{SAGE} Publications}, -} - -@Article{Hunter-2017, - author = {Michael D. Hunter}, - date = {2017-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {State Space Modeling in an Open Source, Modular, Structural Equation Modeling Environment}, - doi = {10.1080/10705511.2017.1369354}, - number = {2}, - pages = {307--324}, - volume = {25}, - abstract = {State space models (SSMs) are introduced in the context of structural equation modeling (SEM). In particular, the OpenMx implementation of SSMs using the Kalman filter and prediction error decomposition is discussed. In reflection of modularity, the implementation uses the same full information maximum likelihood missing data procedures for SSMs and SEMs. Similarly, generic OpenMx features such as likelihood ratio tests, profile likelihood confidence intervals, Hessian-based standard errors, definition variables, and the matrix algebra interface are all supported. Example scripts for specification of autoregressive models, multiple lag models (VAR(p)), multiple lag moving average models (VARMA(p, q)), multiple subject models, and latent growth models are provided. Additionally, latent variable calculation based on the Kalman filter and raw data generation based on a model are all included. Finally, future work for extending SSMs to allow for random effects and for presenting them in diagrams is discussed.}, - publisher = {Informa {UK} Limited}, - keywords = {state space model, software, Kalman filter, OpenMx}, - annotation = {ild, ild-software, sem, sem-software, ssm, ssm-software}, -} - -@Article{Jones-Waller-2013a, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013}, - journaltitle = {Psychological Methods}, - title = {Computing confidence intervals for standardized regression coefficients.}, - doi = {10.1037/a0033269}, - number = {4}, - pages = {435--453}, - volume = {18}, - abstract = {With fixed predictors, the standard method (Cohen, Cohen, West, \& Aiken, 2003, p. 86; Harris, 2001, p. 80; Hays, 1994, p. 709) for computing confidence intervals (CIs) for standardized regression coefficients fails to account for the sampling variability of the criterion standard deviation. With random predictors, this method also fails to account for the sampling variability of the predictor standard deviations. Nevertheless, under some conditions the standard method will produce CIs with accurate coverage rates. To delineate these conditions, we used a Monte Carlo simulation to compute empirical CI coverage rates in samples drawn from 36 populations with a wide range of data characteristics. We also computed the empirical CI coverage rates for 4 alternative methods that have been discussed in the literature: noncentrality interval estimation, the delta method, the percentile bootstrap, and the bias-corrected and accelerated bootstrap. Our results showed that for many data-parameter configurations--for example, sample size, predictor correlations, coefficient of determination ($R^2$), orientation of $\beta$ with respect to the eigenvectors of the predictor correlation matrix, $R_X$--the standard method produced coverage rates that were close to their expected values. However, when population $R^2$ was large and when $\beta$ approached the last eigenvector of $R_X$, then the standard method coverage rates were frequently below the nominal rate (sometimes by a considerable amount). In these conditions, the delta method and the 2 bootstrap procedures were consistently accurate. Results using noncentrality interval estimation were inconsistent. In light of these findings, we recommend that researchers use the delta method to evaluate the sampling variability of standardized regression coefficients.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Jones-Waller-2015, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2015-06}, - journaltitle = {Psychometrika}, - title = {The Normal-Theory and Asymptotic Distribution-Free ({ADF}) Covariance Matrix of Standardized Regression Coefficients: Theoretical Extensions and Finite Sample Behavior}, - doi = {10.1007/s11336-013-9380-y}, - number = {2}, - pages = {365--378}, - volume = {80}, - abstract = {Yuan and Chan (Psychometrika, 76, 670–690, 2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a method for computing this covariance matrix from correlations. Next, we describe an asymptotic distribution-free (ADF; Browne in British Journal of Mathematical and Statistical Psychology, 37, 62–83, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that the ADF method works well with nonnormal data in moderate-to-large samples using both simulated and real-data examples. R code (R Development Core Team, 2012) is available from the authors or through the Psychometrika online repository for supplementary materials.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {standardized-regression, standardized-regression-hc}, -} - -@Article{Koopman-Howe-Hollenbeck-etal-2015, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck and Hock-Peng Sin}, - date = {2015}, - journaltitle = {Journal of Applied Psychology}, - title = {Small sample mediation testing: Misplaced confidence in bootstrapped confidence intervals}, - doi = {10.1037/a0036635}, - number = {1}, - pages = {194--202}, - volume = {100}, - abstract = {Bootstrapping is an analytical tool commonly used in psychology to test the statistical significance of the indirect effect in mediation models. Bootstrapping proponents have particularly advocated for its use for samples of 20-80 cases. This advocacy has been heeded, especially in the Journal of Applied Psychology, as researchers are increasingly utilizing bootstrapping to test mediation with samples in this range. We discuss reasons to be concerned with this escalation, and in a simulation study focused specifically on this range of sample sizes, we demonstrate not only that bootstrapping has insufficient statistical power to provide a rigorous hypothesis test in most conditions but also that bootstrapping has a tendency to exhibit an inflated Type I error rate. We then extend our simulations to investigate an alternative empirical resampling method as well as a Bayesian approach and demonstrate that they exhibit comparable statistical power to bootstrapping in small samples without the associated inflated Type I error. Implications for researchers testing mediation hypotheses in small samples are presented. For researchers wishing to use these methods in their own research, we have provided R syntax in the online supplemental materials.}, - publisher = {American Psychological Association ({APA})}, - keywords = {mediation, bootstrapping, permutation, Bayes}, -} - -@Article{Kurtzer-Sochat-Bauer-2017, - author = {Gregory M. Kurtzer and Vanessa Sochat and Michael W. Bauer}, - date = {2017-05}, - journaltitle = {{PLOS} {ONE}}, - title = {{Singularity}: Scientific containers for mobility of compute}, - doi = {10.1371/journal.pone.0177459}, - editor = {Attila Gursoy}, - number = {5}, - pages = {e0177459}, - volume = {12}, - publisher = {Public Library of Science ({PLoS})}, - annotation = {container, container-singularity}, -} - -@Article{Kwan-Chan-2011, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2011-04}, - journaltitle = {Behavior Research Methods}, - title = {Comparing standardized coefficients in structural equation modeling: A model reparameterization approach}, - doi = {10.3758/s13428-011-0088-6}, - number = {3}, - pages = {730--745}, - volume = {43}, - abstract = {We propose a two-stage method for comparing standardized coefficients in structural equation modeling (SEM). At stage 1, we transform the original model of interest into the standardized model by model reparameterization, so that the model parameters appearing in the standardized model are equivalent to the standardized parameters of the original model. At stage 2, we impose appropriate linear equality constraints on the standardized model and use a likelihood ratio test to make statistical inferences about the equality of standardized coefficients. Unlike other existing methods for comparing standardized coefficients, the proposed method does not require specific modeling features (e.g., specification of nonlinear constraints), which are available only in certain SEM software programs. Moreover, this method allows researchers to compare two or more standardized coefficients simultaneously in a standard and convenient way. Three real examples are given to illustrate the proposed method, using EQS, a popular SEM software program. Results show that the proposed method performs satisfactorily for testing the equality of standardized coefficients.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Kwan-Chan-2014, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2014-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Comparing squared multiple correlation coefficients using structural equation modeling}, - doi = {10.1080/10705511.2014.882673}, - number = {2}, - pages = {225--238}, - volume = {21}, - abstract = {In social science research, a common topic in multiple regression analysis is to compare the squared multiple correlation coefficients in different populations. Existing methods based on asymptotic theories (Olkin \& Finn, 1995) and bootstrapping (Chan, 2009) are available but these can only handle a 2-group comparison. Another method based on structural equation modeling (SEM) has been proposed recently. However, this method has three disadvantages. First, it requires the user to explicitly specify the sample R2 as a function in terms of the basic SEM model parameters, which is sometimes troublesome and error prone. Second, it requires the specification of nonlinear constraints, which is not available in some popular SEM software programs. Third, it is for a 2-group comparison primarily. In this article, a 2-stage SEM method is proposed as an alternative. Unlike all other existing methods, the proposed method is simple to use, and it does not require any specific programming features such as the specification of nonlinear constraints. More important, the method allows a simultaneous comparison of 3 or more groups. A real example is given to illustrate the proposed method using EQS, a popular SEM software program.}, - keywords = {squared multiple correlation coefficients, structural equation modeling, model reparameterization, multi-sample analysis}, - publisher = {Informa {UK} Limited}, -} - -@Article{Merkel-2014, - author = {Dirk Merkel}, - date = {2014}, - journaltitle = {Linux Journal}, - title = {{Docker}: Lightweight {Linux} containers for consistent development and deployment}, - number = {239}, - pages = {2}, - volume = {2014}, - url = {https://www.linuxjournal.com/content/docker-lightweight-linux-containers-consistent-development-and-deployment}, - annotation = {container, container-docker}, -} - -@Article{Neale-Hunter-Pritikin-etal-2015, - author = {Michael C. Neale and Michael D. Hunter and Joshua N. Pritikin and Mahsa Zahery and Timothy R. Brick and Robert M. Kirkpatrick and Ryne Estabrook and Timothy C. Bates and Hermine H. Maes and Steven M. Boker}, - date = {2015-01}, - journaltitle = {Psychometrika}, - title = {{OpenMx} 2.0: Extended Structural Equation and Statistical Modeling}, - doi = {10.1007/s11336-014-9435-8}, - number = {2}, - pages = {535--549}, - volume = {81}, - abstract = {The new software package OpenMx 2.0 for structural equation and other statistical modeling is introduced and its features are described. OpenMx is evolving in a modular direction and now allows a mix-and-match computational approach that separates model expectations from fit functions and optimizers. Major backend architectural improvements include a move to swappable open-source optimizers such as the newly written CSOLNP. Entire new methodologies such as item factor analysis and state space modeling have been implemented. New model expectation functions including support for the expression of models in LISREL syntax and a simplified multigroup expectation function are available. Ease-of-use improvements include helper functions to standardize model parameters and compute their Jacobian-based standard errors, access to model components through standard R \$ mechanisms, and improved tab completion from within the R Graphical User Interface.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Ou-Hunter-Chow-2019, - author = {Lu Ou and Michael D. Hunter and Sy-Miin Chow}, - date = {2019}, - journaltitle = {The R Journal}, - title = {What's for {dynr}: A package for linear and nonlinear dynamic modeling in {R}}, - doi = {10.32614/rj-2019-012}, - number = {1}, - pages = {91}, - volume = {11}, - abstract = {Intensive longitudinal data in the behavioral sciences are often noisy, multivariate in nature, and may involve multiple units undergoing regime switches by showing discontinuities interspersed with continuous dynamics. Despite increasing interest in using linear and nonlinear differential/difference equation models with regime switches, there has been a scarcity of software packages that are fast and freely accessible. We have created an R package called dynr that can handle a broad class of linear and nonlinear discreteand continuous-time models, with regime-switching properties and linear Gaussian measurement functions, in C, while maintaining simple and easy-to learn model specification functions in R. We present the mathematical and computational bases used by the dynr R package, and present two illustrative examples to demonstrate the unique features of dynr.}, - publisher = {The R Foundation}, - annotation = {ild, ild-software, r, r-packages}, -} - -@Article{Preacher-Selig-2012, - author = {Kristopher J. Preacher and James P. Selig}, - date = {2012-04}, - journaltitle = {Communication Methods and Measures}, - title = {Advantages of Monte Carlo Confidence Intervals for Indirect Effects}, - doi = {10.1080/19312458.2012.679848}, - number = {2}, - pages = {77--98}, - volume = {6}, - abstract = {Monte Carlo simulation is a useful but underutilized method of constructing confidence intervals for indirect effects in mediation analysis. The Monte Carlo confidence interval method has several distinct advantages over rival methods. Its performance is comparable to other widely accepted methods of interval construction, it can be used when only summary data are available, it can be used in situations where rival methods (e.g., bootstrapping and distribution of the product methods) are difficult or impossible, and it is not as computer-intensive as some other methods. In this study we discuss Monte Carlo confidence intervals for indirect effects, report the results of a simulation study comparing their performance to that of competing methods, demonstrate the method in applied examples, and discuss several software options for implementation in applied settings.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-montecarlo, mediation-bootstrap, semmcci}, -} - -@Article{Rosseel-2012, - author = {Yves Rosseel}, - date = {2012}, - journaltitle = {Journal of Statistical Software}, - title = {{lavaan}: An {R} package for structural equation modeling}, - doi = {10.18637/jss.v048.i02}, - number = {2}, - volume = {48}, - abstract = {Structural equation modeling (SEM) is a vast field and widely used by many applied researchers in the social and behavioral sciences. Over the years, many software packages for structural equation modeling have been developed, both free and commercial. However, perhaps the best state-of-the-art software packages in this field are still closed-source and/or commercial. The R package lavaan has been developed to provide applied researchers, teachers, and statisticians, a free, fully open-source, but commercial-quality package for latent variable modeling. This paper explains the aims behind the development of the package, gives an overview of its most important features, and provides some examples to illustrate how lavaan works in practice.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Schouten-Lugtig-Vink-2018, - author = {Rianne Margaretha Schouten and Peter Lugtig and Gerko Vink}, - date = {2018-07}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Generating missing values for simulation purposes: A multivariate amputation procedure}, - doi = {10.1080/00949655.2018.1491577}, - number = {15}, - pages = {2909--2930}, - volume = {88}, - abstract = {Missing data form a ubiquitous problem in scientific research, especially since most statistical analyses require complete data. To evaluate the performance of methods dealing with missing data, researchers perform simulation studies. An important aspect of these studies is the generation of missing values in a simulated, complete data set: the amputation procedure. We investigated the methodological validity and statistical nature of both the current amputation practice and a newly developed and implemented multivariate amputation procedure. We found that the current way of practice may not be appropriate for the generation of intuitive and reliable missing data problems. The multivariate amputation procedure, on the other hand, generates reliable amputations and allows for a proper regulation of missing data problems. The procedure has additional features to generate any missing data scenario precisely as intended. Hence, the multivariate amputation procedure is an efficient method to accurately evaluate missing data methodology.}, - publisher = {Informa {UK} Limited}, - keywords = {missing data, multiple imputation, multivariate amputation, evaluation}, -} - -@Article{Tofighi-Kelley-2019, - author = {Davood Tofighi and Ken Kelley}, - date = {2019-06}, - journaltitle = {Multivariate Behavioral Research}, - title = {Indirect effects in sequential mediation models: Evaluating methods for hypothesis testing and confidence interval formation}, - doi = {10.1080/00273171.2019.1618545}, - number = {2}, - pages = {188--210}, - volume = {55}, - abstract = {Complex mediation models, such as a two-mediator sequential model, have become more prevalent in the literature. To test an indirect effect in a two-mediator model, we conducted a large-scale Monte Carlo simulation study of the Type I error, statistical power, and confidence interval coverage rates of 10 frequentist and Bayesian confidence/credible intervals (CIs) for normally and nonnormally distributed data. The simulation included never-studied methods and conditions (e.g., Bayesian CI with flat and weakly informative prior methods, two model-based bootstrap methods, and two nonnormality conditions) as well as understudied methods (e.g., profile-likelihood, Monte Carlo with maximum likelihood standard error [MC-ML] and robust standard error [MC-Robust]). The popular BC bootstrap showed inflated Type I error rates and CI under-coverage. We recommend different methods depending on the purpose of the analysis. For testing the null hypothesis of no mediation, we recommend MC-ML, profile-likelihood, and two Bayesian methods. To report a CI, if data has a multivariate normal distribution, we recommend MC-ML, profile-likelihood, and the two Bayesian methods; otherwise, for multivariate nonnormal data we recommend the percentile bootstrap. We argue that the best method for testing hypotheses is not necessarily the best method for CI construction, which is consistent with the findings we present.}, - keywords = {indirect effect, confidence interval, sequential mediation, Bayesian credible interval}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bayesian, mediation-bootstrap, mediation-lb, mediation-montecarlo, semmcci}, -} - -@Article{Tofighi-MacKinnon-2015, - author = {Davood Tofighi and David P. MacKinnon}, - date = {2015-08}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{Monte Carlo} confidence intervals for complex functions of indirect effects}, - doi = {10.1080/10705511.2015.1057284}, - number = {2}, - pages = {194--205}, - volume = {23}, - abstract = {One challenge in mediation analysis is to generate a confidence interval (CI) with high coverage and power that maintains a nominal significance level for any well-defined function of indirect and direct effects in the general context of structural equation modeling (SEM). This study discusses a proposed Monte Carlo extension that finds the CIs for any well-defined function of the coefficients of SEM such as the product of $k$ coefficients and the ratio of the contrasts of indirect effects, using the Monte Carlo method. Finally, we conduct a small-scale simulation study to compare CIs produced by the Monte Carlo, nonparametric bootstrap, and asymptotic-delta methods. Based on our simulation study, we recommend researchers use the Monte Carlo method to test a complex function of indirect effects.}, - keywords = {confidence interval, mediation analysis, Monte Carlo}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-delta, mediation-montecarlo, semmcci}, -} - -@Article{vanBuuren-GroothuisOudshoorn-2011, - author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{mice}: Multivariate Imputation by Chained Equations in {R}}, - doi = {10.18637/jss.v045.i03}, - number = {3}, - volume = {45}, - abstract = {The R package mice imputes incomplete multivariate data by chained equations. The software mice 1.0 appeared in the year 2000 as an S-PLUS library, and in 2001 as an R package. mice 1.0 introduced predictor selection, passive imputation and automatic pooling. This article documents mice, which extends the functionality of mice 1.0 in several ways. In mice, the analysis of imputed data is made completely general, whereas the range of models under which pooling works is substantially extended. mice adds new functionality for imputing multilevel data, automatic predictor selection, data handling, post-processing imputed values, specialized pooling routines, model selection tools, and diagnostic graphs. Imputation of categorical data is improved in order to bypass problems caused by perfect prediction. Special attention is paid to transformations, sum scores, indices and interactions using passive imputation, and to the proper setup of the predictor matrix. mice can be downloaded from the Comprehensive R Archive Network. This article provides a hands-on, stepwise approach to solve applied incomplete data problems.}, - publisher = {Foundation for Open Access Statistic}, - keywords = {MICE, multiple imputation, chained equations, fully conditional specification, Gibbs sampler, predictor selection, passive imputation, R}, -} - -@Article{Wu-Jia-2013, - author = {Wei Wu and Fan Jia}, - date = {2013-09}, - journaltitle = {Multivariate Behavioral Research}, - title = {A new procedure to test mediation with missing data through nonparametric bootstrapping and multiple imputation}, - doi = {10.1080/00273171.2013.816235}, - number = {5}, - pages = {663--691}, - volume = {48}, - abstract = {This article proposes a new procedure to test mediation with the presence of missing data by combining nonparametric bootstrapping with multiple imputation (MI). This procedure performs MI first and then bootstrapping for each imputed data set. The proposed procedure is more computationally efficient than the procedure that performs bootstrapping first and then MI for each bootstrap sample. The validity of the procedure is evaluated using a simulation study under different sample size, missing data mechanism, missing data proportion, and shape of distribution conditions. The result suggests that the proposed procedure performs comparably to the procedure that combines bootstrapping with full information maximum likelihood under most conditions. However, caution needs to be taken when using this procedure to handle missing not-at-random or nonnormal data.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Yuan-Chan-2011, - author = {Ke-Hai Yuan and Wai Chan}, - date = {2011-08}, - journaltitle = {Psychometrika}, - title = {Biases and Standard Errors of Standardized Regression Coefficients}, - doi = {10.1007/s11336-011-9224-6}, - number = {4}, - pages = {670--690}, - volume = {76}, - abstract = {The paper obtains consistent standard errors (SE) and biases of order O(1/n) for the sample standardized regression coefficients with both random and given predictors. Analytical results indicate that the formulas for SEs given in popular text books are consistent only when the population value of the regression coefficient is zero. The sample standardized regression coefficients are also biased in general, although it should not be a concern in practice when the sample size is not too small. Monte Carlo results imply that, for both standardized and unstandardized sample regression coefficients, SE estimates based on asymptotics tend to under-predict the empirical ones at smaller sample sizes.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {asymptotics, bias, consistency, Monte Carlo}, - annotation = {standardized-regression, standardized-regression-delta, standardized-regression-normal, standardized-regression-adf}, -} - -@Article{Yzerbyt-Muller-Batailler-etal-2018, - author = {Vincent Yzerbyt and Dominique Muller and C{\a'e}dric Batailler and Charles M. Judd}, - date = {2018-12}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {New recommendations for testing indirect effects in mediational models: The need to report and test component paths}, - doi = {10.1037/pspa0000132}, - number = {6}, - pages = {929--943}, - volume = {115}, - abstract = {In light of current concerns with replicability and reporting false-positive effects in psychology, we examine Type I errors and power associated with 2 distinct approaches for the assessment of mediation, namely the component approach (testing individual parameter estimates in the model) and the index approach (testing a single mediational index). We conduct simulations that examine both approaches and show that the most commonly used tests under the index approach risk inflated Type I errors compared with the joint-significance test inspired by the component approach. We argue that the tendency to report only a single mediational index is worrisome for this reason and also because it is often accompanied by a failure to critically examine the individual causal paths underlying the mediational model. We recommend testing individual components of the indirect effect to argue for the presence of an indirect effect and then using other recommended procedures to calculate the size of that effect. Beyond simple mediation, we show that our conclusions also apply in cases of within-participant mediation and moderated mediation. We also provide a new R-package that allows for an easy implementation of our recommendations.}, - publisher = {American Psychological Association ({APA})}, - keywords = {indirect effects, mediation, joint-significance, bootstrap}, -} - -@Article{Zhang-Wang-2012, - author = {Zhiyong Zhang and Lijuan Wang}, - date = {2012-12}, - journaltitle = {Psychometrika}, - title = {Methods for mediation analysis with missing data}, - doi = {10.1007/s11336-012-9301-5}, - number = {1}, - pages = {154--184}, - volume = {78}, - abstract = {Despite wide applications of both mediation models and missing data techniques, formal discussion of mediation analysis with missing data is still rare. We introduce and compare four approaches to dealing with missing data in mediation analysis including listwise deletion, pairwise deletion, multiple imputation (MI), and a two-stage maximum likelihood (TS-ML) method. An R package bmem is developed to implement the four methods for mediation analysis with missing data in the structural equation modeling framework, and two real examples are used to illustrate the application of the four methods. The four methods are evaluated and compared under MCAR, MAR, and MNAR missing data mechanisms through simulation studies. Both MI and TS-ML perform well for MCAR and MAR data regardless of the inclusion of auxiliary variables and for AV-MNAR data with auxiliary variables. Although listwise deletion and pairwise deletion have low power and large parameter estimation bias in many studied conditions, they may provide useful information for exploring missing mechanisms.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, missing data, MI, TS-ML, bootstrap, auxiliary variables}, -} - -@Book{Eddelbuettel-2013, - author = {Dirk Eddelbuettel}, - date = {2013}, - title = {Seamless {R} and {C++} integration with {Rcpp}}, - doi = {10.1007/978-1-4614-6868-4}, - isbn = {978-1-4614-6868-4}, - publisher = {Springer New York}, - abstract = {Illustrates a range of statistical computations in R using the Rcpp package. Provides a general introduction to extending R with C++ code. Features an appendix for R users new to the C++ programming language Rcpp packages are presented in the context of useful application case studies.}, - annotation = {r, r-packages}, -} - -@Book{Enders-2010, - author = {Craig K. Enders}, - date = {2010-05-31}, - title = {Applied missing data analysis}, - isbn = {9781606236390}, - pagetotal = {377}, - library = {HA29 .E497 2010}, - addendum = {https://lccn.loc.gov/2010008465}, - abstract = {Walking readers step by step through complex concepts, this book translates missing data techniques into something that applied researchers and graduate students can understand and utilize in their own research. Enders explains the rationale and procedural details for maximum likelihood estimation, Bayesian estimation, multiple imputation, and models for handling missing not at random (MNAR) data. Easy-to-follow examples and small simulated data sets illustrate the techniques and clarify the underlying principles. The companion website (www.appliedmissingdata.com) includes data files and syntax for the examples in the book as well as up-to-date information on software. The book is accessible to substantive researchers while providing a level of detail that will satisfy quantitative specialists.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Missing observations (Statistics), Social sciences--Research--Methodology}, -} - -@InBook{Koopman-Howe-Hollenbeck-2014, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck}, - booktitle = {More statistical and methodological myths and urban legends: Doctrine, verity and fable in organizational and social sciences}, - date = {2014}, - title = {Pulling the {Sobel} test up by its bootstraps}, - bookauthor = {Charles E. Lance and Robert J. Vandenberg}, - isbn = {9780203775851}, - pages = {224--243}, - doi = {10.4324/9780203775851 }, - isbn = {9780203775851}, - abstract = {In the domain of building and testing theory, mediation relationships are among the most important that can be proposed. Mediation helps to explicate our theoretical models (Leavitt, Mitchell, \& Peterson, 2010) and addresses the fundamental question of why two constructs are related (Whetten, 1989). One of the better-known methods for testing mediation is commonly referred to as the ``Sobel test,'' named for the researcher who derived a standard error (Sobel, 1982) to test the significance of the indirect effect. Recently, a number of different research teams (e.g., Preacher \& Hayes, 2004; Shrout \& Bolger, 2002) have criticized the Sobel test because this standard error requires an assumption of normality for the indirect effect sampling distribution. This distribution tends to be positively skewed (i.e,. not normal), particularly in small samples, and so this assumption can be problematic (Preacher \& Hayes, 2004; Stone \& Sobel, 1990). As a result, the statistical power of the Sobel test may be lessened in these contexts (Preacher \& Hayes 2004; Shrout \& Bolger, 2002). In light of this concern, some scholars have advocated instead for the use of bootstrapping to test the significance of the indirect effect (e.g.. Shrout \& Bolger 2002). Bootstrapping requires no a priori assumption about the shape of the sampling distribution because this distribution is empirically estimated using a resampling procedure (Efron \& Tibshirani, 1993). As a result, departures from normality are less troublesome when creating a confidence interval for the indirect effect. For this reason, bootstrapping is now widely believed to be inherently superior to the Sobel test when testing the significance of the indirect effect in organizational research. Our position is that this belief constitutes an urban legend. As with all statistical urban legends, there is an underlying kernel of truth to the belief that bootstrapping is superior to the Sobel test. However, as we discuss in this chapter, there are several reasons to be concerned with a broad belief in the superiority of bootstrapping. We begin with a brief overview of mediation testing focusing on the Sobel test and bootstrapping and then explain the underlying kernel of truth that has propelled bootstrapping to the forefront of mediation testing in organizational research. Subsequently, we discuss four areas of concern that cast doubt on the belief of the inherent superiority of bootstrapping. Finally, we conclude with recommendations concerning the future of mediation testing in organizational research.}, - publisher = {Routledge/Taylor \& Francis Group}, -} - -@Book{Little-Rubin-2019, - author = {Roderick J. A. Little and Donald B. Rubin}, - date = {2019-04}, - title = {Statistical analysis with missing data}, - doi = {10.1002/9781119482260}, - edition = {3}, - isbn = {9781119482260}, - library = {QA276}, - addendum = {https://lccn.loc.gov/2018061330}, - abstract = {An up-to-date, comprehensive treatment of a classic text on missing data in statistics. - The topic of missing data has gained considerable attention in recent decades. This new edition by two acknowledged experts on the subject offers an up-to-date account of practical methodology for handling missing data problems. Blending theory and application, authors Roderick Little and Donald Rubin review historical approaches to the subject and describe simple methods for multivariate analysis with missing values. They then provide a coherent theory for analysis of problems based on likelihoods derived from statistical models for the data and the missing data mechanism, and then they apply the theory to a wide range of important missing data problems. - Statistical Analysis with Missing Data, Third Edition starts by introducing readers to the subject and approaches toward solving it. It looks at the patterns and mechanisms that create the missing data, as well as a taxonomy of missing data. It then goes on to examine missing data in experiments, before discussing complete-case and available-case analysis, including weighting methods. The new edition expands its coverage to include recent work on topics such as nonresponse in sample surveys, causal inference, diagnostic methods, and sensitivity analysis, among a host of other topics. - \begin{itemize} \item An updated ``classic'' written by renowned authorities on the subject \item Features over 150 exercises (including many new ones) \item Covers recent work on important methods like multiple imputation, robust alternatives to weighting, and Bayesian methods \item Revises previous topics based on past student feedback and class experience \item Contains an updated and expanded bibliography \end{itemize} - The authors were awarded The Karl Pearson Prize in 2017 by the International Statistical Institute, for a research contribution that has had profound influence on statistical theory, methodology or applications. Their work ``has been no less than defining and transforming.'' (ISI) - Statistical Analysis with Missing Data, Third Edition is an ideal textbook for upper undergraduate and/or beginning graduate level students of the subject. It is also an excellent source of information for applied statisticians and practitioners in government and industry.}, - publisher = {Wiley}, - keywords = {Mathematical statistics, Mathematical statistics--Problems, exercises, etc., Missing observations (Statistics), Missing observations (Statistics)--Problems, exercises, etc.}, -} - -@Book{Pawitan-2013, - author = {Yudi Pawitan}, - date = {2013-01-17}, - title = {In all likelihood: Statistical modelling and inference using likelihood}, - isbn = {9780199671229}, - pagetotal = {544}, - abstract = {Based on a course in the theory of statistics this text concentrates on what can be achieved using the likelihood/Fisherian method of taking account of uncertainty when studying a statistical problem. It takes the concept ot the likelihood as providing the best methods for unifying the demands of statistical modelling and the theory of inference. Every likelihood concept is illustrated by realistic examples, which are not compromised by computational problems. Examples range from a simile comparison of two accident rates, to complex studies that require generalised linear or semiparametric modelling. - The emphasis is that the likelihood is not simply a device to produce an estimate, but an important tool for modelling. The book generally takes an informal approach, where most important results are established using heuristic arguments and motivated with realistic examples. With the currently available computing power, examples are not contrived to allow a closed analytical solution, and the book can concentrate on the statistical aspects of the data modelling. In addition to classical likelihood theory, the book covers many modern topics such as generalized linear models and mixed models, non parametric smoothing, robustness, the EM algorithm and empirical likelihood.}, - publisher = {Oxford University Press}, -} - -@Book{vanBuuren-2018, - author = {Stef {van Buuren}}, - date = {2018-07}, - title = {Flexible imputation of missing data}, - doi = {10.1201/9780429492259}, - edition = {2}, - isbn = {9780429492259}, - publisher = {Chapman and Hall/{CRC}}, - library = {QA278}, - addendum = {https://lccn.loc.gov/2019719619}, - abstract = {Missing data pose challenges to real-life data analysis. Simple ad-hoc fixes, like deletion or mean imputation, only work under highly restrictive conditions, which are often not met in practice. Multiple imputation replaces each missing value by multiple plausible values. The variability between these replacements reflects our ignorance of the true (but missing) value. Each of the completed data set is then analyzed by standard methods, and the results are pooled to obtain unbiased estimates with correct confidence intervals. Multiple imputation is a general approach that also inspires novel solutions to old problems by reformulating the task at hand as a missing-data problem. - This is the second edition of a popular book on multiple imputation, focused on explaining the application of methods through detailed worked examples using the MICE package as developed by the author. This new edition incorporates the recent developments in this fast-moving field. - This class-tested book avoids mathematical and technical details as much as possible: formulas are accompanied by verbal statements that explain the formula in accessible terms. The book sharpens the reader’s intuition on how to think about missing data, and provides all the tools needed to execute a well-grounded quantitative analysis in the presence of missing data.}, - keywords = {Multivariate analysis, Multiple imputation (Statistics), Missing observations (Statistics)}, -} - -@InCollection{Zhang-Wang-Tong-2015, - author = {Zhiyong Zhang and Lijuan Wang and Xin Tong}, - booktitle = {Quantitative Psychology Research}, - date = {2015}, - title = {Mediation analysis with missing data through multiple imputation and bootstrap}, - doi = {10.1007/978-3-319-19977-1_24}, - pages = {341--355}, - abtract = {A method using multiple imputation and bootstrap for dealing with missing data in mediation analysis is introduced and implemented in both SAS and R. Through simulation studies, it is shown that the method performs well for both MCAR and MAR data without and with auxiliary variables. It is also shown that the method can work for MNAR data if auxiliary variables related to missingness are included. The application of the method is demonstrated through the analysis of a subset of data from the National Longitudinal Survey of Youth. Mediation analysis with missing data can be conducted using the provided SAS macros and R package bmem.}, - publisher = {Springer International Publishing}, - keywords = {mediation analysis, missing data, multiple imputation, bootstrap}, -} - -@Report{Jones-Waller-2013b, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013-05-25}, - institution = {University of Minnesota-Twin Cities}, - title = {The normal-theory and asymptotic distribution-free ({ADF}) covariance matrix of standardized regression coefficients: Theoretical extensions and finite sample behavior}, - type = {techreport}, - url = {http://users.cla.umn.edu/~nwaller/downloads/techreports/TR052913.pdf}, - urldate = {2022-07-22}, - abstract = {Yuan and Chan (2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a new method for computing this covariance matrix from correlations. We then show that Yuan and Chan's original equations can also be used when only correlational data are available. Next, we describe an asymptotic distribution-free (ADF; Browne, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that theADF method works well with non-normal data in moderate-to-large samples using both simulated and real-data examples. Finally, we provide R code (R Development Core Team, 2012) in an Appendix to make these methods accessible to applied researchers.}, -} - -@Manual{Muthen-Muthen-2017, - author = {Linda K. Muth{\a'e}n and Bengt O. Muth{\a'e}n}, - date = {2017}, - title = {{Mplus} user’s guide. {Eighth} edition}, - location = {Los Angeles, CA}, - publisher = {{Muth\'en} \& {Muth\'en}}, - annotation = {sem, sem-software}, -} - -@Article{Cheung-2021, - author = {Mike W.-L. Cheung}, - date = {2021-06}, - journaltitle = {Alcohol and Alcoholism}, - title = {Synthesizing indirect effects in mediation models with meta-analytic methods}, - doi = {10.1093/alcalc/agab044}, - number = {1}, - pages = {5--15}, - volume = {57}, - abstract = {Aims - A mediator is a variable that explains the underlying mechanism between an independent variable and a dependent variable. The indirect effect indicates the effect from the predictor to the outcome variable via the mediator. In contrast, the direct effect represents the predictor's effort on the outcome variable after controlling for the mediator. - Methods - A single study rarely provides enough evidence to answer research questions in a particular domain. Replications are generally recommended as the gold standard to conduct scientific research. When a sufficient number of studies have been conducted addressing similar research questions, a meta-analysis can be used to synthesize those studies' findings. - Results - The main objective of this paper is to introduce two frameworks to integrating studies using mediation analysis. The first framework involves calculating standardized indirect effects and direct effects and conducting a multivariate meta-analysis on those effect sizes. The second one uses meta-analytic structural equation modeling to synthesize correlation matrices and fit mediation models on the average correlation matrix. We illustrate these procedures on a real dataset using the R statistical platform. - Conclusion - This paper closes with some further directions for future studies.}, - publisher = {Oxford University Press ({OUP})}, - keywords = {heterogeneity, gold standard, outcome variable, datasets, mediation analysis}, -} - -@Article{Cheung-Pesigan-2023a, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {{FINDOUT}: Using either {SPSS} commands or graphical user interface to identify influential cases in structural equation modeling in {AMOS}}, - doi = {10.1080/00273171.2022.2148089}, - pages = {1--5}, - abstract = {The results in a structural equation modeling (SEM) analysis can be influenced by just a few observations, called influential cases. Tools have been developed for users of R to identify them. However, similar tools are not available for AMOS, which is also a popular SEM software package. We introduce the FINDOUT toolset, a group of SPSS extension commands, and an AMOS plugin, to identify influential cases and examine how these cases influence the results. The SPSS commands can be used either as syntax commands or as custom dialogs from pull-down menus, and the AMOS plugin can be run from AMOS pull-down menu. We believe these tools can help researchers to examine the robustness of their findings to influential cases.}, - publisher = {Informa {UK} Limited}, - keywords = {influential cases, outliers, structural equation modeling, AMOS, sensitivity analysis, SPSS}, -} - -@Article{Cheung-Pesigan-2023b, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-05}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{semlbci}: An {R} package for forming likelihood-based confidence intervals for parameter estimates, correlations, indirect effects, and other derived parameters}, - doi = {10.1080/10705511.2023.2183860}, - pages = {1--15}, - abstract = {There are three common types of confidence interval (CI) in structural equation modeling (SEM): Wald-type CI, bootstrapping CI, and likelihood-based CI (LBCI). LBCI has the following advantages: (1) it has better coverage probabilities and Type I error rate compared to Wald-type CI when the sample size is finite; (2) it correctly tests the null hypothesis of a parameter based on likelihood ratio chi-square difference test; (3) it is less computationally intensive than bootstrapping CI; and (4) it is invariant to transformations. However, LBCI is not available in many popular SEM software packages. We developed an R package, semlbci, for forming LBCI for parameters in models fitted by lavaan, a popular open-source SEM package, such that researchers have more options in forming CIs for parameters in SEM. The package supports both unstandardized and standardized estimates, derived parameters such as indirect effect, multisample models, and the robust LBCI proposed by Falk.}, - publisher = {Informa {UK} Limited}, - keywords = {confidence interval, likelihood-based confidence interval, robust method, structural equation modeling}, - annotation = {r, r-packages, sem, sem-software, lb}, -} - -@Article{Cheung-Pesigan-Vong-2022, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan and Weng Ngai Vong}, - date = {2022-03}, - journaltitle = {Behavior Research Methods}, - title = {{DIY} bootstrapping: Getting the nonparametric bootstrap confidence interval in {SPSS} for any statistics or function of statistics (when this bootstrapping is appropriate)}, - doi = {10.3758/s13428-022-01808-5}, - number = {2}, - pages = {474--490}, - volume = {55}, - abstract = {Researchers can generate bootstrap confidence intervals for some statistics in SPSS using the BOOTSTRAP command. However, this command can only be applied to selected procedures, and only to selected statistics in these procedures. We developed an extension command and prepared some sample syntax files based on existing approaches from the Internet to illustrate how researchers can (a) generate a large number of nonparametric bootstrap samples, (b) do desired analysis on all these samples, and (c) form the bootstrap confidence intervals for selected statistics using the OMS commands. We developed these tools to help researchers apply nonparametric bootstrapping to any statistics for which this method is appropriate, including statistics derived from other statistics, such as standardized effect size measures computed from the t test results. We also discussed how researchers can extend the tools for other statistics and scenarios they encounter.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {bootstrapping, effect sizes, confidence intervals}, -} - -@Article{Li-Oravecz-Zhou-etal-2022, - author = {Yanling Li and Zita Oravecz and Shuai Zhou and Yosef Bodovski and Ian J. Barnett and Guangqing Chi and Yuan Zhou and Naomi P. Friedman and Scott I. Vrieze and Sy-Miin Chow}, - date = {2022-01}, - journaltitle = {Psychometrika}, - title = {{Bayesian} forecasting with a regime-switching zero-inflated multilevel poisson regression model: An application to adolescent alcohol use with spatial covariates}, - doi = {10.1007/s11336-021-09831-9}, - number = {2}, - pages = {376--402}, - volume = {87}, - abstract = {In this paper, we present and evaluate a novel Bayesian regime-switching zero-inflated multilevel Poisson (RS-ZIMLP) regression model for forecasting alcohol use dynamics. The model partitions individuals’ data into two phases, known as regimes, with: (1) a zero-inflation regime that is used to accommodate high instances of zeros (non-drinking) and (2) a multilevel Poisson regression regime in which variations in individuals’ log-transformed average rates of alcohol use are captured by means of an autoregressive process with exogenous predictors and a person-specific intercept. The times at which individuals are in each regime are unknown, but may be estimated from the data. We assume that the regime indicator follows a first-order Markov process as related to exogenous predictors of interest. The forecast performance of the proposed model was evaluated using a Monte Carlo simulation study and further demonstrated using substance use and spatial covariate data from the Colorado Online Twin Study (CoTwins). Results showed that the proposed model yielded better forecast performance compared to a baseline model which predicted all cases as non-drinking and a reduced ZIMLP model without the RS structure, as indicated by higher AUC (the area under the receiver operating characteristic (ROC) curve) scores, and lower mean absolute errors (MAEs) and root-mean-square errors (RMSEs). The improvements in forecast performance were even more pronounced when we limited the comparisons to participants who showed at least one instance of transition to drinking. }, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Bayesian zero-inflated Poisson model, forecast, intensive longitudinal data, regime-switching, spatial data, substance use}, - annotation = {bayesian, ild}, -} - -@Article{McNeish-MacKinnon-2022, - author = {Daniel McNeish and David P. MacKinnon}, - date = {2022-12}, - journaltitle = {Psychological Methods}, - title = {Intensive longitudinal mediation in {Mplus}}, - doi = {10.1037/met0000536}, - abstract = {Much of the existing longitudinal mediation literature focuses on panel data where relatively few repeated measures are collected over a relatively broad timespan. However, technological advances in data collection (e.g., smartphones, wearables) have led to a proliferation of short duration, densely collected longitudinal data in behavioral research. These intensive longitudinal data differ in structure and focus relative to traditionally collected panel data. As a result, existing methodological resources do not necessarily extend to nuances present in the recent influx of intensive longitudinal data and designs. In this tutorial, we first cover potential limitations of traditional longitudinal mediation models to accommodate unique characteristics of intensive longitudinal data. Then, we discuss how recently developed dynamic structural equation models (DSEMs) may be well-suited for mediation modeling with intensive longitudinal data and can overcome some of the limitations associated with traditional approaches. We describe four increasingly complex intensive longitudinal mediation models: (a) stationary models where the indirect effect is constant over time and people, (b) person-specific models where the indirect effect varies across people, (c) dynamic models where the indirect effect varies across time, and (d) cross-classified models where the indirect effect varies across both time and people. We apply each model to a running example featuring a mobile health intervention designed to improve health behavior of individuals with binge eating disorder. In each example, we provide annotated Mplus code and interpretation of the output to guide empirical researchers through mediation modeling with this increasingly popular type of longitudinal data.}, - publisher = {American Psychological Association ({APA})}, - keywords = {intensive longitudinal data, time-series, mediation, EMA, daily diary}, - annotation = {ild, ild-mediation, ild-software}, -} - -@Article{Nust-Eddelbuettel-Bennett-etal-2020, - author = {Daniel N{\"u}st and Dirk Eddelbuettel and Dom Bennett and Robrecht Cannoodt and Dav Clark and Gergely Dar{\a'o}czi and Mark Edmondson and Colin Fay and Ellis Hughes and Lars Kjeldgaard and Sean Lopp and Ben Marwick and Heather Nolis and Jacqueline Nolis and Hong Ooi and Karthik Ram and Noam Ross and Lori Shepherd and P{\a'e}ter S{\a'o}lymos and Tyson Lee Swetnam and Nitesh Turaga and Charlotte {Van Petegem} and Jason Williams and Craig Willis and Nan Xiao}, - date = {2020}, - journaltitle = {The R Journal}, - title = {The {Rockerverse}: Packages and applications for containerisation with {R}}, - doi = {10.32614/rj-2020-007}, - number = {1}, - pages = {437}, - volume = {12}, - abstract = {The Rocker Project provides widely used Docker images for R across different application scenarios. This article surveys downstream projects that build upon the Rocker Project images and presents the current state of R packages for managing Docker images and controlling containers. These use cases cover diverse topics such as package development, reproducible research, collaborative work, cloud-based data processing, and production deployment of services. The variety of applications demonstrates the power of the Rocker Project specifically and containerisation in general. Across the diverse ways to use containers, we identified common themes: reproducible environments, scalability and efficiency, and portability across clouds. We conclude that the current growth and diversification of use cases is likely to continue its positive impact, but see the need for consolidating the Rockerverse ecosystem of packages, developing common practices for applications, and exploring alternative containerisation software.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-rocker}, -} - -@Article{Pesigan-Cheung-2020, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2020-12}, - journaltitle = {Frontiers in Psychology}, - title = {{SEM}-based methods to form confidence intervals for indirect effect: Still applicable given nonnormality, under certain conditions}, - doi = {10.3389/fpsyg.2020.571928}, - volume = {11}, - abstract = {A SEM-based approach using likelihood-based confidence interval (LBCI) has been proposed to form confidence intervals for unstandardized and standardized indirect effect in mediation models. However, when used with the maximum likelihood estimation, this approach requires that the variables are multivariate normally distributed. This can affect the LBCIs of unstandardized and standardized effect differently. In the present study, the robustness of this approach when the predictor is not normally distributed but the error terms are conditionally normal, which does not violate the distributional assumption of ordinary least squares (OLS) estimation, is compared to four other approaches: nonparametric bootstrapping, two variants of LBCI, LBCI assuming the predictor is fixed (LBCI-Fixed-X) and LBCI based on ADF estimation (LBCI-ADF), and Monte Carlo. A simulation study was conducted using a simple mediation model and a serial mediation model, manipulating the distribution of the predictor. The Monte Carlo method performed worst among the methods. LBCI and LBCI-Fixed-X had suboptimal performance when the distributions had high kurtosis and the population indirect effects were medium to large. In some conditions, the problem was severe even when the sample size was large. LBCI-ADF and nonparametric bootstrapping had coverage probabilities close to the nominal value in nearly all conditions, although the coverage probabilities were still suboptimal for the serial mediation model when the sample size was small with respect to the model. Implications of these findings in the context of this special case of nonnormal data were discussed.}, - publisher = {Frontiers Media {SA}}, - keywords = {mediation, nonnormal, confidence interval, structural equation modeling, bootstrapping}, -} - -@Article{Pesigan-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2023-08}, - journaltitle = {Behavior Research Methods}, - title = {{Monte Carlo} confidence intervals for the indirect effect with missing data}, - doi = {10.3758/s13428-023-02114-4}, - abstract = {Missing data is a common occurrence in mediation analysis. As a result, the methods used to construct confidence intervals around the indirect effect should consider missing data. Previous research has demonstrated that, for the indirect effect in data with complete cases, the Monte Carlo method performs as well as nonparametric bootstrap confidence intervals (see MacKinnon et al., Multivariate Behavioral Research, 39(1), 99–128, 2004; Preacher \& Selig, Communication Methods and Measures, 6(2), 77–98, 2012; Tofighi \& MacKinnon, Structural Equation Modeling: A Multidisciplinary Journal, 23(2), 194–205, 2015). In this manuscript, we propose a simple, fast, and accurate two-step approach for generating confidence intervals for the indirect effect, in the presence of missing data, based on the Monte Carlo method. In the first step, an appropriate method, for example, full-information maximum likelihood or multiple imputation, is used to estimate the parameters and their corresponding sampling variance-covariance matrix in a mediation model. In the second step, the sampling distribution of the indirect effect is simulated using estimates from the first step. A confidence interval is constructed from the resulting sampling distribution. A simulation study with various conditions is presented. Implications of the results for applied research are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Monte Carlo method, nonparametric bootstrap, indirect effect, mediation, missing completely at random, missing at random, full-information maximum likelihood, multiple imputation}, - annotation = {mediation, mediation-montecarlo, mediation-bootstrap, semmcci}, -} - -@Article{Pesigan-Sun-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Rong Wei Sun and Shu Fai Cheung}, - date = {2023-04}, - journaltitle = {Multivariate Behavioral Research}, - title = {{betaDelta} and {betaSandwich}: Confidence intervals for standardized regression coefficients in {R}}, - doi = {10.1080/00273171.2023.2201277}, - pages = {1--4}, - abstract = {The multivariate delta method was used by Yuan and Chan to estimate standard errors and confidence intervals for standardized regression coefficients. Jones and Waller extended the earlier work to situations where data are nonnormal by utilizing Browne’s asymptotic distribution-free (ADF) theory. Furthermore, Dudgeon developed standard errors and confidence intervals, employing heteroskedasticity-consistent (HC) estimators, that are robust to nonnormality with better performance in smaller sample sizes compared to Jones and Waller’s ADF technique. Despite these advancements, empirical research has been slow to adopt these methodologies. This can be a result of the dearth of user-friendly software programs to put these techniques to use. We present the betaDelta and the betaSandwich packages in the R statistical software environment in this manuscript. Both the normal-theory approach and the ADF approach put forth by Yuan and Chan and Jones and Waller are implemented by the betaDelta package. The HC approach proposed by Dudgeon is implemented by the betaSandwich package. The use of the packages is demonstrated with an empirical example. We think the packages will enable applied researchers to accurately assess the sampling variability of standardized regression coefficients.}, - publisher = {Informa {UK} Limited}, - keywords = {standardized regression coefficients, confidence intervals, delta method standard errors, heteroskedasticity-consistent standard errors, R package}, - annotation = {r, r-packages}, -} - -@Article{Savalei-Rosseel-2021, - author = {Victoria Savalei and Yves Rosseel}, - date = {2021-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Computational options for standard errors and test statistics with incomplete normal and nonnormal data in {SEM}}, - doi = {10.1080/10705511.2021.1877548}, - number = {2}, - pages = {163--181}, - volume = {29}, - abstract = {This article provides an overview of different computational options for inference following normal theory maximum likelihood (ML) estimation in structural equation modeling (SEM) with incomplete normal and nonnormal data. Complete data are covered as a special case. These computational options include whether the information matrix is observed or expected, whether the observed information matrix is estimated numerically or using an analytic asymptotic approximation, and whether the information matrix and the outer product matrix of the score vector are evaluated at the saturated or at the structured estimates. A variety of different standard errors and robust test statistics become possible by varying these options. We review the asymptotic properties of these computational variations, and we show how to obtain them using lavaan in R. We hope that this article will encourage methodologists to study the impact of the available computational options on the performance of standard errors and test statistics in SEM.}, - publisher = {Informa {UK} Limited}, - keywords = {incomplete data, nonnormal data, robust corrections, software implementation}, -} - -@Article{Tofighi-Kelley-2020, - author = {Davood Tofighi and Ken Kelley}, - date = {2020}, - journaltitle = {Psychological Methods}, - title = {Improved inference in mediation analysis: Introducing the model-based constrained optimization procedure}, - doi = {10.1037/met0000259}, - pages = {496--515}, - volume = {25}, - abstract = {Mediation analysis is an important approach for investigating causal pathways. One approach used in mediation analysis is the test of an indirect effect, which seeks to measure how the effect of an independent variable impacts an outcome variable through one or more mediators. However, in many situations the proposed tests of indirect effects, including popular confidence interval-based methods, tend to produce poor Type I error rates when mediation does not occur and, more generally, only allow dichotomous decisions of ``not significant'' or ``significant'' with regards to the statistical conclusion. To remedy these issues, we propose a new method, a likelihood ratio test (LRT), that uses non-linear constraints in what we term the model-based constrained optimization (MBCO) procedure. The MBCO procedure (a) offers a more robust Type I error rate than existing methods; (b) provides a p-value, which serves as a continuous measure of compatibility of data with the hypothesized null model (not just a dichotomous reject or fail-to-reject decision rule); (c) allows simple and complex hypotheses about mediation (i.e., one or more mediators; different mediational pathways), and (d) allows the mediation model to use observed or latent variables. The MBCO procedure is based on a structural equation modeling framework (even if latent variables are not specified) with specialized fitting routines, namely with the use of non-linear constraints. We advocate using the MBCO procedure to test hypotheses about an indirect effect in addition to reporting a confidence interval to capture uncertainty about the indirect effect because this combination transcends existing methods.}, - publisher = {{American Psychological Association ({APA})}}, -} - -@Article{Wang-Zhang-2020, - author = {Lijuan Wang and Qian Zhang}, - date = {2020-06}, - journaltitle = {Psychological Methods}, - title = {Investigating the impact of the time interval selection on autoregressive mediation modeling: Result interpretations, effect reporting, and temporal designs}, - doi = {10.1037/met0000235}, - number = {3}, - pages = {271--291}, - volume = {25}, - abstract = {This study investigates the impact of the time interval (the time passed between 2 consecutive measurements) selection on autoregressive mediation modeling (AMM). For a widely used autoregressive mediation model, via analytical derivations, we explained why and how the conventionally reported time-specific coefficient estimates (e.g., $\hat{a} \hat{b}$ and $\hat{c}^{\prime}$ ) and inference results in AMM provide limited information and can arrive in even misleading conclusions about direct and indirect effects over time. Furthermore, under the stationarity assumption, we proposed an approach to calculate the overall direct and indirect effect estimates over time and the time lag lengths at which they reach maxima, using AMM results. The derivation results revealed that the overall direct and indirect effect curves are asymptotically invariant to the time interval selection, under stationarity. With finite samples and thus sampling errors and potential computing problems, however, our simulation results revealed that the overall indirect effect curves were better recovered when the time interval is selected to be closer to half of the time lag length at which the overall indirect effect reaches its maximum. An R function and an R Shiny app were developed to obtain the overall direct and indirect effect curves over time and facilitate the time interval selection using AMM results. Our findings provide another look at the connections between AMM and continuous time mediation modeling and the connections are discussed.}, - publisher = {American Psychological Association ({APA})}, - keywords = {longitudinal mediation, autoregressive mediation modeling, time interval selection, time-specific indirect effect, overall indirect effect}, - annotation = {ild, ild-mediation}, -} - -@Book{Hayes-2022, - author = {Andrew F. Hayes}, - date = {2022}, - title = {Introduction to mediation, moderation, and conditional process analysis: A regression-based approach}, - series = {Methodology in the social sciences}, - edition = {3}, - isbn = {9781462549030}, - pages = {732}, - library = {HA31.3 .H39 2022}, - addendum = {https://lccn.loc.gov/2021031108}, - abstract = {Lauded for its easy-to-understand, conversational discussion of the fundamentals of mediation, moderation, and conditional process analysis, this book has been fully revised with 50\% new content, including sections on working with multicategorical antecedent variables, the use of PROCESS version 3 for SPSS and SAS for model estimation, and annotated PROCESS v3 outputs. Using the principles of ordinary least squares regression, Andrew F. Hayes carefully explains procedures for testing hypotheses about the conditions under and the mechanisms by which causal effects operate, as well as the moderation of such mechanisms. Hayes shows how to estimate and interpret direct, indirect, and conditional effects; probe and visualize interactions; test questions about moderated mediation; and report different types of analyses. Data for all the examples are available on the companion website (www.afhayes.com) along with links to download PROCESS.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Mediation (Statistics), Regression analysis}, -} - -@Manual{Arbuckle-2020, - author = {James L. Arbuckle}, - date = {2020}, - title = {Amos 27.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Manual{Arbuckle-2021, - author = {James L. Arbuckle}, - date = {2021}, - title = {Amos 28.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Report{Asparouhov-Muthen-2022, - author = {Tihomir Asparouhov and Bengt O. Muth{\a'e}n}, - date = {2022}, - title = {Multiple imputation with {Mplus}}, - type = {techreport}, - url = {http://www.statmodel.com/download/Imputations7.pdf}, - institution = {http://www.statmodel.com}, -} - -@Manual{Eddelbuettel-Francois-Allaire-etal-2023, - title = {{Rcpp}: Seamless {R} and {C++} Integration}, - author = {Dirk Eddelbuettel and Romain Francois and JJ Allaire and Kevin Ushey and Qiang Kou and Nathan Russell and Inaki Ucar and Douglas Bates and John Chambers}, - year = {2023}, - note = {R package version 1.0.11}, - url = {https://CRAN.R-project.org/package=Rcpp}, - annotation = {r, r-package}, -} - -@Manual{Jorgensen-Pornprasertmanit-Schoemann-etal-2022, - title = {{semTools}: Useful tools for structural equation modeling}, - author = {Terrence D. Jorgensen and Sunthud Pornprasertmanit and Alexander M. Schoemann and Yves Rosseel}, - year = {2022}, - note = {R package version 0.5-6}, - url = {https://CRAN.R-project.org/package=semTools}, -} - -@Misc{Kurtzer-cclerget-Bauer-etal-2021, - author = {Gregory M. Kurtzer and {cclerget} and Michael Bauer and Ian Kaneshiro and David Trudgian and David Godlove}, - date = {2021}, - title = {{hpcng/singularity: Singularity 3.7.3}}, - doi = {10.5281/ZENODO.1310023}, - copyright = {Open Access}, - publisher = {Zenodo}, - annotation = {container, container-singularity}, -} - -@Manual{RCoreTeam-2021, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2021}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2022, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2022}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2023, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2023}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{Waller-2022, - author = {Niels G. Waller}, - title = {{fungible}: Psychometric functions from the {Waller Lab}}, - year = {2022}, - note = {R package version 2.2.1}, - url = {https://CRAN.R-project.org/package=fungible}, - publisher = {The R Foundation}, - annotation = {r, r-package}, -} - -@PhdThesis{Pesigan-2022, - author = {Ivan Jacob Agaloos Pesigan}, - year = {2022}, - school = {University of Macau}, - title = {Confidence intervals for standardized coefficients: Applied to regression coefficients in primary studies and indirect effects in meta-analytic structural equation modeling}, - type = {phdthesis}, -} diff --git a/CITATION.cff b/CITATION.cff index 4a73989..c006f2a 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -40,7 +40,7 @@ preferred-citation: doi: 10.1080/00273171.2023.2201277 journal: Multivariate Behavioral Research notes: R package version 1.0.5.9000 -repository: https://packagemanager.rstudio.com/all/__linux__/focal/latest/ +repository: https://packagemanager.rstudio.com/all/__linux__/jammy/latest/ repository-code: https://github.com/jeksterslab/betaSandwich url: https://jeksterslab.github.io/betaSandwich/ contact: diff --git a/vignettes/vignettes.bib b/vignettes/vignettes.bib deleted file mode 100644 index cd21073..0000000 --- a/vignettes/vignettes.bib +++ /dev/null @@ -1,1450 +0,0 @@ -@Article{Craig-1936, - author = {Cecil C. Craig}, - date = {1936-03}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {On the frequency function of $xy$}, - doi = {10.1214/aoms/1177732541}, - number = {1}, - pages = {1--15}, - volume = {7}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Aroian-1947, - author = {Leo A. Aroian}, - date = {1947-06}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The probability function of the product of two normally distributed variables}, - doi = {10.1214/aoms/1177730442}, - number = {2}, - pages = {265--271}, - volume = {18}, - abstract = {Let $x$ and $y$ follow a normal bivariate probability function with means $\bar X, \bar Y$, standard deviations $\sigma_1, \sigma_2$, respectively, $r$ the coefficient of correlation, and $\rho_1 = \bar X/\sigma_1, \rho_2 = \bar Y/\sigma_2$. Professor C. C. Craig [1] has found the probability function of $z = xy/\sigma_1\sigma_2$ in closed form as the difference of two integrals. For purposes of numerical computation he has expanded this result in an infinite series involving powers of $z, \rho_1, \rho_2$, and Bessel functions of a certain type; in addition, he has determined the moments, semin-variants, and the moment generating function of $z$. However, for $\rho_1$ and $\rho_2$ large, as Craig points out, the series expansion converges very slowly. Even for $\rho_1$ and $\rho_2$ as small as 2, the expansion is unwieldy. We shall show that as $\rho_1$ and $\rho_2 \rightarrow \infty$, the probability function of $z$ approaches a normal curve and in case $r = 0$ the Type III function and the Gram-Charlier Type A series are excellent approximations to the $z$ distribution in the proper region. Numerical integration provides a substitute for the infinite series wherever the exact values of the probability function of $z$ are needed. Some extensions of the main theorem are given in section 5 and a practical problem involving the probability function of $z$ is solved.}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Cochran-1952, - author = {William G. Cochran}, - date = {1952-09}, - journaltitle = {The Annals of Mathematical Statistics}, - title = {The $\chi^{2}$ test of goodness of fit}, - doi = {10.1214/aoms/1177729380}, - number = {3}, - pages = {315--345}, - volume = {23}, - publisher = {Institute of Mathematical Statistics}, - abstract = {This paper contains an expository discussion of the chi square test of goodness of fit, intended for the student and user of statistical theory rather than for the expert. Part I describes the historical development of the distribution theory on which the test rests. Research bearing on the practical application of the test--in particular on the minimum expected number per class and the construction of classes--is discussed in Part II. Some varied opinions about the extent to which the test actually is useful to the scientist are presented in Part III. Part IV outlines a number of tests that have been proposed as substitutes for the chi square test (the $\omega^2$ test, the smooth test, the likelihood ratio test) and Part V a number of supplementary tests (the run test, tests based on low moments, subdivision of chi square into components).}, - publisher = {Institute of Mathematical Statistics}, -} - -@Article{Goodman-1960, - author = {Leo A. Goodman}, - date = {1960-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {On the exact variance of products}, - doi = {10.1080/01621459.1960.10483369}, - number = {292}, - pages = {708--713}, - volume = {55}, - abstract = {A simple exact formula for the variance of the product of two random variables, say, x and y, is given as a function of the means and central product-moments of x and y. The usual approximate variance formula for xy is compared with this exact formula; e.g., we note, in the special case where x and y are independent, that the ``variance'' computed by the approximate formula is less than the exact variance, and that the accuracy of the approximation depends on the sum of the reciprocals of the squared coefficients of variation of x and y. The case where x and y need not be independent is also studied, and exact variance formulas are presented for several different ``product estimates.'' (The usefulness of exact formulas becomes apparent when the variances of these estimates are compared.) When x and y are independent, simple unbiased estimates of these exact variances are suggested; in the more general case, consistent estimates are presented.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Bradley-1978, - author = {James V. Bradley}, - date = {1978-11}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Robustness?}, - doi = {10.1111/j.2044-8317.1978.tb00581.x}, - number = {2}, - pages = {144--152}, - volume = {31}, - publisher = {Wiley}, - annotation = {robustness}, - abstract = {The actual behaviour of the probability of a Type I error under assumption violation is quite complex, depending upon a wide variety of interacting factors. Yet allegations of robustness tend to ignore its highly particularistic nature and neglect to mention important qualifying conditions. The result is often a vast overgeneralization which nevertheless is difficult to refute since a standard quantitative definition of what constitutes robustness does not exist. Yet under any halfway reasonable quantitative definition, many of the most prevalent claims of robustness would be demonstrably false. Therefore robustness is a highly questionable concept.}, -} - -@Article{Rubin-1976, - author = {Donald B. Rubin}, - date = {1976}, - journaltitle = {Biometrika}, - title = {Inference and missing data}, - doi = {10.1093/biomet/63.3.581}, - number = {3}, - pages = {581--592}, - volume = {63}, - publisher = {Oxford University Press ({OUP})}, - abstract = {When making sampling distribution inferences about the parameter of the data, $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are `missing at random' and the observed data are `observed at random', but these inferences are generally conditional on the observed pattern of missing data. When making direct-likelihood or Bayesian inferences about $\theta$, it is appropriate to ignore the process that causes missing data if the missing data are missing at random and the parameter of the missing data process is `distinct' from $\theta$. These conditions are the weakest general conditions under which ignoring the process that causes missing data always leads to correct inferences.}, - publisher = {Oxford University Press ({OUP})}, -} - -@Article{Baron-Kenny-1986, - author = {Reuben M. Baron and David A. Kenny}, - date = {1986}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {The moderator-mediator variable distinction in social psychological research: Conceptual, strategic, and statistical considerations}, - doi = {10.1037/0022-3514.51.6.1173}, - number = {6}, - pages = {1173--1182}, - volume = {51}, - abstract = {In this article, we attempt to distinguish between the properties of moderator and mediator variables at a number of levels. First, we seek to make theorists and researchers aware of the importance of not using the terms moderator and mediator interchangeably by carefully elaborating, both conceptually and strategically, the many ways in which moderators and mediators differ. We then go beyond this largely pedagogical function and delineate the conceptual and strategic implications of making use of such distinctions with regard to a wide range of phenomena, including control and stress, attitudes, and personality traits. We also provide a specific compendium of analytic procedures appropriate for making the most effective use of the moderator and mediator distinction, both separately and in terms of a broader causal system that includes both moderators and mediators.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Browne-1984, - author = {Michael W. Browne}, - date = {1984-05}, - journaltitle = {British Journal of Mathematical and Statistical Psychology}, - title = {Asymptotically distribution-free methods for the analysis of covariance structures}, - doi = {10.1111/j.2044-8317.1984.tb00789.x}, - number = {1}, - pages = {62--83}, - volume = {37}, - abstract = {Methods for obtaining tests of fit of structural models for covariance matrices and estimator standard error which are asymptotically distribution free are derived. Modifications to standard normal theory tests and standard errors which make them applicable to the wider class of elliptical distributions are provided. A random sampling experiment to investigate some of the proposed methods is described.}, - publisher = {Wiley}, -} - -@Article{Efron-1987, - author = {Bradley Efron}, - date = {1987-03}, - journaltitle = {Journal of the American Statistical Association}, - title = {Better bootstrap confidence intervals}, - doi = {10.1080/01621459.1987.10478410}, - number = {397}, - pages = {171--185}, - volume = {82}, - abstract = {We consider the problem of setting approximate confidence intervals for a single parameter $\theta$ in a multiparameter family. The standard approximate intervals based on maximum likelihood theory, $\hat{\theta} \pm \hat{\sigma} z^{\left( \alpha \right)}$, can be quite misleading. In practice, tricks based on transformations, bias corrections, and so forth, are often used to improve their accuracy. The bootstrap confidence intervals discussed in this article automatically incorporate such tricks without requiring the statistician to think them through for each new application, at the price of a considerable increase in computational effort. The new intervals incorporate an improvement over previously suggested methods, which results in second-order correctness in a wide variety of problems. In addition to parametric families, bootstrap intervals are also developed for nonparametric situations.}, - publisher = {Informa {UK} Limited}, - keywords = {resampling methods, approximate confidence intervals, transformations, nonparametric intervals, second-order theory, skewness corrections}, -} - -@Article{Efron-1988, - author = {Bradley Efron}, - date = {1988}, - journaltitle = {Psychological Bulletin}, - title = {Bootstrap confidence intervals: Good or bad?}, - doi = {10.1037/0033-2909.104.2.293}, - number = {2}, - pages = {293--296}, - volume = {104}, - abstract = {The bootstrap is a nonparametric technique for estimating standard errors and approximate confidence intervals. Rasmussen has used a simulation experiment to suggest that bootstrap confidence intervals perform very poorly in the estimation of a correlation coefficient. Part of Rasmussen's simulation is repeated. A careful look at the results shows the bootstrap intervals performing quite well. Some remarks are made concerning the virtues and defects of bootstrap intervals in general.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{James-Brett-1984, - author = {Lawrence R. James and Jeanne M. Brett}, - date = {1984}, - journaltitle = {Journal of Applied Psychology}, - title = {Mediators, moderators, and tests for mediation}, - doi = {10.1037/0021-9010.69.2.307}, - number = {2}, - pages = {307--321}, - volume = {69}, - abstract = {Discusses mediation relations in causal terms. Influences of an antecedent are transmitted to a consequence through an intervening mediator. Mediation relations may assume a number of functional forms, including nonadditive, nonlinear, and nonrecursive forms. Although mediation and moderation are distinguishable processes, with nonadditive forms (moderated mediation) a particular variable may be both a mediator and a moderator within a single set of functional relations. Current models for testing mediation relations in industrial and organizational psychology often involve an interplay between exploratory (correlational) statistical tests and causal inference. It is suggested that no middle ground exists between exploratory and confirmatory (causal) analysis and that attempts to explain how mediation processes occur require specified causal models.}, - publisher = {American Psychological Association ({APA})}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Judd-Kenny-1981, - author = {Charles M. Judd and David A. Kenny}, - date = {1981-10}, - journaltitle = {Evaluation Review}, - title = {Process analysis}, - doi = {10.1177/0193841x8100500502}, - number = {5}, - pages = {602--619}, - volume = {5}, - abstract = {This article presents the rationale and procedures for conducting a process analysis in evaluation research. Such an analysis attempts to identify the process that mediates the effects of some treatment, by estimating the parameters of a causal chain between the treatment and some outcome variable. Two different procedures for estimating mediation are discussed. In addition we present procedures for examining whether a treatment exerts its effects, in part, by altering the mediating process that produces the outcome. Finally, the benefits of process analysis in evaluation research are underlined.}, - publisher = {{SAGE} Publications}, - annotation = {mediation, mediation-causalsteps}, -} - -@Article{Micceri-1989, - author = {Theodore Micceri}, - date = {1989}, - journaltitle = {Psychological Bulletin}, - title = {The unicorn, the normal curve, and other improbable creatures}, - doi = {10.1037/0033-2909.105.1.156}, - number = {1}, - pages = {156--166}, - volume = {105}, - abtsract = {An investigation of the distributional characteristics of 440 large-sample achievement and psychometric measures found all to be significantly nonnormal at the alpha .01 significance level. Several classes of contamination were found, including tail weights from the uniform to the double exponential, exponential-level asymmetry, severe digit preferences, multimodalities, and modes external to the mean/median interval. Thus, the underlying tenets of normality-assuming statistics appear fallacious for these commonly used types of data. However, findings here also fail to support the types of distributions used in most prior robustness research suggesting the failure of such statistics under nonnormal conditions. A reevaluation of the statistical robustness literature appears appropriate in light of these findings.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Sobel-1982, - author = {Michael E. Sobel}, - date = {1982}, - journaltitle = {Sociological Methodology}, - title = {Asymptotic confidence intervals for indirect effects in structural equation models}, - doi = {10.2307/270723}, - pages = {290}, - volume = {13}, - publisher = {{JSTOR}}, -} - -@Article{Sobel-1986, - author = {Michael E. Sobel}, - date = {1986}, - journaltitle = {Sociological Methodology}, - title = {Some new results on indirect effects and their standard errors in covariance structure models}, - doi = {10.2307/270922}, - pages = {159}, - volume = {16}, - publisher = {{JSTOR}}, -} - -@Article{Sobel-1987, - author = {Michael E. Sobel}, - date = {1987-08}, - journaltitle = {Sociological Methods {\&} Research}, - title = {Direct and indirect effects in linear structural equation models}, - doi = {10.1177/0049124187016001006}, - number = {1}, - pages = {155--176}, - volume = {16}, - abstract = {This article discusses total indirect effects in linear structural equation models. First, I define these effects. Second, I show how the delta method may be used to obtain the standard errors of the sample estimates of these effects and test hypotheses about the magnitudes of the indirect effects. To keep matters simple, I focus throughout on a particularly simple linear structural equation system; for a treatment of the general case, see Sobel (1986). To illustrate the ideas and results, a detailed example is presented.}, - publisher = {{SAGE} Publications}, -} - -@Article{Venzon-Moolgavkar-1988, - author = {D. J. Venzon and S. H. Moolgavkar}, - date = {1988}, - journaltitle = {Applied Statistics}, - title = {A method for computing profile-likelihood-based confidence intervals}, - doi = {10.2307/2347496}, - number = {1}, - pages = {87}, - volume = {37}, - abstract = {The method of constructing confidence regions based on the generalised likelihood ratio statistic is well known for parameter vectors. A similar construction of a confidence interval for a single entry of a vector can be implemented by repeatedly maximising over the other parameters. We present an algorithm for finding these confidence interval endpoints that requires less computation. It employs a modified Newton-Raphson iteration to solve a system of equations that defines the endpoints.}, - publisher = {{JSTOR}}, - keywords = {confidence intervals, profile likelihood}, -} - -@Article{White-1980, - author = {Halbert White}, - date = {1980-05}, - journaltitle = {Econometrica}, - title = {A heteroskedasticity-consistent covariance matrix estimator and a direct test for heteroskedasticity}, - doi = {10.2307/1912934}, - number = {4}, - pages = {817--838}, - volume = {48}, - abstract = {This paper presents a parameter covariance matrix estimator which is consistent even when the disturbances of a linear regression model are heteroskedastic. This estimator does not depend on a formal model of the structure of the heteroskedasticity. By comparing the elements of the new estimator to those of the usual covariance estimator, one obtains a direct test for heteroskedasticity, since in the absence of heteroskedasticity, the two estimators will be approximately equal, but will generally diverge otherwise. The test has an appealing least squares interpretation.}, - publisher = {{JSTOR}}, -} - -@Book{Cohen-1988, - author = {Jacob Cohen}, - date = {1988}, - title = {Statistical power analysis for the behavioral sciences}, - doi = {10.4324/9780203771587}, - edition = {2}, - isbn = {9780203771587}, - publisher = {Routledge}, - library = {HA29 .C66 1988}, - keywords = {Social sciences--Statistical methods, Probabilities, Statistical power analysis}, - addendum = {https://lccn.loc.gov/88012110}, - abstract = {Statistical Power Analysis is a nontechnical guide to power analysis in research planning that provides users of applied statistics with the tools they need for more effective analysis. The Second Edition includes: \begin{itemize} \item a chapter covering power analysis in set correlation and multivariate methods; \item a chapter considering effect size, psychometric reliability, and the efficacy of ``qualifying'' dependent variables and; \item expanded power and sample size tables for multiple regression/correlation. \end{itemize}}, -} - -@Book{NationalResearchCouncil-1982, - author = {{National Research Council}}, - date = {1982-01}, - title = {An assessment of research-doctorate programs in the {United States}: Social and behavioral sciences}, - doi = {10.17226/9781}, - location = {Washington, D.C.}, - publisher = {National Academies Press}, - annotation = {data}, -} - -@Book{Rubin-1987, - author = {Donald B. Rubin}, - date = {1987-06}, - title = {Multiple imputation for nonresponse in surveys}, - doi = {10.1002/9780470316696}, - isbn = {9780470316696}, - location = {New York}, - publisher = {John Wiley {\&} Sons, Inc.}, - library = {HA31.2 .R83 1987}, - keywords = {Multiple imputation (Statistics), Nonresponse (Statistics), Social surveys--Response rate}, - addendum = {https://lccn.loc.gov/86028935}, - annotation = {Lib-Missing-Data-Books}, - abstract = {Demonstrates how nonresponse in sample surveys and censuses can be handled by replacing each missing value with two or more multiple imputations. Clearly illustrates the advantages of modern computing to such handle surveys, and demonstrates the benefit of this statistical technique for researchers who must analyze them. Also presents the background for Bayesian and frequentist theory. After establishing that only standard complete-data methods are needed to analyze a multiply-imputed set, the text evaluates procedures in general circumstances, outlining specific procedures for creating imputations in both the ignorable and nonignorable cases. Examples and exercises reinforce ideas, and the interplay of Bayesian and frequentist ideas presents a unified picture of modern statistics.}, -} - -@Article{Bollen-Stine-1990, - author = {Kenneth A. Bollen and Robert Stine}, - date = {1990}, - journaltitle = {Sociological Methodology}, - title = {Direct and indirect effects: Classical and bootstrap estimates of variability}, - doi = {10.2307/271084}, - pages = {115}, - volume = {20}, - abstract = {The decomposition of effects in structural equation models has been of considerable interest to social scientists. Finite-sample or asymptotic results for the sampling distribution of estimators of direct effects are widely available. Statistical inferences about indirect effects have relied exclusively on asymptotic methods which assume that the limiting distribution of the estimator is normal, with a standard error derived from the delta method. We examine bootstrap procedures as another way to generate standard errors and confidence intervals and to estimate the sampling distributions of estimators of direct and indirect effects. We illustrate the classical and the bootstrap methods with three empirical examples. We find that in a moderately large sample, the bootstrap distribution of an estimator is close to that assumed with the classical and delta methods but that in small samples, there are some differences. Bootstrap methods provide a check on the classical and delta methods when the latter are applied under less than ideal conditions.}, - publisher = {{JSTOR}}, -} - -@Article{Li-Raghunathan-Rubin-1991, - author = {K. H. Li and Trivellore Eachambadi Raghunathan and Donald B. Rubin}, - date = {1991-12}, - journaltitle = {Journal of the American Statistical Association}, - title = {Large-sample significance levels from multiply imputed data using moment-based statistics and an {$F$} reference distribution}, - doi = {10.1080/01621459.1991.10475152}, - number = {416}, - pages = {1065--1073}, - volume = {86}, - abstract = {We present a procedure for computing significance levels from data sets whose missing values have been multiply imputed data. This procedure uses moment-based statistics, $m \leq 3$ repeated imputations, and an F reference distribution. When $m = \infty$, we show first that our procedure is essentially the same as the ideal procedure in cases of practical importance and, second, that its deviations from the ideal are basically a function of the coefficient of variation of the canonical ratios of complete to observed information. For small $m$ our procedure's performance is largely governed by this coefficient of variation and the mean of these ratios. Using simulation techniques with small $m$, we compare our procedure's actual and nominal large-sample significance levels and conclude that it is essentially calibrated and thus represents a definite improvement over previously available procedures. Furthermore, we compare the large-sample power of the procedure as a function of $m$ and other factors, such as the dimensionality of the estimand and fraction of missing information, to provide guidance on the choice of the number of imputations; generally, we find the loss of power due to small $m$ to be quite modest in cases likely to occur in practice.}, - publisher = {Informa {UK} Limited}, - keywords = {imputation, missing data, nonresponse, tests of significance}, - annotation = {missing, missing-mi}, -} - -@InBook{Arbuckle-1996, - author = {James L. Arbuckle}, - booktitle = {Advanced structural equation modeling}, - date = {1996}, - title = {Full information estimation in the presence of incomplete data}, - doi = {10.4324/9781315827414}, - editor = {George A. Marcoulides and Randall E. Schumacker}, -} - -@Book{Davison-Hinkley-1997, - author = {Anthony Christopher Davison and David Victor Hinkley}, - publisher = {Cambridge University Press}, - title = {Bootstrap methods and their application}, - series = {Cambridge Series in Statistical and Probabilistic Mathematics}, - date = {1997}, - location = {Cambridge and New York, NY, USA }, - doi = {10.1017/CBO9780511802843}, - isbn = {9780521573917}, - library = {QA276.8 .D38 1997}, - keywords = {Bootstrap (Statistics)}, - addendum = {https://lccn.loc.gov/96030064}, - abstract = {Bootstrap methods are computer-intensive methods of statistical analysis, which use simulation to calculate standard errors, confidence intervals, and significance tests. The methods apply for any level of modelling, and so can be used for fully parametric, semiparametric, and completely nonparametric analysis. This 1997 book gives a broad and up-to-date coverage of bootstrap methods, with numerous applied examples, developed in a coherent way with the necessary theoretical basis. Applications include stratified data; finite populations; censored and missing data; linear, nonlinear, and smooth regression models; classification; time series and spatial problems. Special features of the book include: extensive discussion of significance tests and confidence intervals; material on various diagnostic methods; and methods for efficient computation, including improved Monte Carlo simulation. Each chapter includes both practical and theoretical exercises. S-Plus programs for implementing the methods described in the text are available from the supporting website.}, - annotation = {bootstrap}, -} - -@Book{Efron-Tibshirani-1993, - author = {Bradley Efron and Robert J. Tibshirani}, - publisher = {Chapman \& Hall}, - title = {An introduction to the bootstrap}, - series = {Monographs on statistics and applied probability ; 57}, - date = {1993}, - location = {New York}, - doi = {10.1201/9780429246593}, - isbn = {9780412042317}, - library = {QA276.8 .E3745 1993}, - addendum = {https://lccn.loc.gov/93004489}, - abstract = {Statistics is a subject of many uses and surprisingly few effective practitioners. The traditional road to statistical knowledge is blocked, for most, by a formidable wall of mathematics. The approach in An Introduction to the Bootstrap avoids that wall. It arms scientists and engineers, as well as statisticians, with the computational techniques they need to analyze and understand complicated data sets.}, - keywords = {Bootstrap (Statistics)}, -} - -@Book{Schafer-1997, - author = {Joseph L. Schafer}, - date = {1997-08}, - title = {Analysis of incomplete multivariate data}, - doi = {10.1201/9780367803025}, - isbn = {9780367803025}, - abstract = {The last two decades have seen enormous developments in statistical methods for incomplete data. The EM algorithm and its extensions, multiple imputation, and Markov Chain Monte Carlo provide a set of flexible and reliable tools from inference in large classes of missing-data problems. Yet, in practical terms, those developments have had surprisingly little impact on the way most data analysts handle missing values on a routine basis. - Analysis of Incomplete Multivariate Data helps bridge the gap between theory and practice, making these missing-data tools accessible to a broad audience. It presents a unified, Bayesian approach to the analysis of incomplete multivariate data, covering datasets in which the variables are continuous, categorical, or both. The focus is applied, where necessary, to help readers thoroughly understand the statistical properties of those methods, and the behavior of the accompanying algorithms. - All techniques are illustrated with real data examples, with extended discussion and practical advice. All of the algorithms described in this book have been implemented by the author for general use in the statistical languages S and S Plus. The software is available free of charge on the Internet.}, - publisher = {Chapman and Hall/CRC}, -} - -@Article{Bauer-Preacher-Gil-2006, - author = {Daniel J. Bauer and Kristopher J. Preacher and Karen M. Gil}, - date = {2006}, - journaltitle = {Psychological Methods}, - title = {Conceptualizing and testing random indirect effects and moderated mediation in multilevel models: New procedures and recommendations}, - doi = {10.1037/1082-989x.11.2.142}, - number = {2}, - pages = {142--163}, - volume = {11}, - abstracts = {The authors propose new procedures for evaluating direct, indirect, and total effects in multilevel models when all relevant variables are measured at Level 1 and all effects are random. Formulas are provided for the mean and variance of the indirect and total effects and for the sampling variances of the average indirect and total effects. Simulations show that the estimates are unbiased under most conditions. Confidence intervals based on a normal approximation or a simulated sampling distribution perform well when the random effects are normally distributed but less so when they are nonnormally distributed. These methods are further developed to address hypotheses of moderated mediation in the multilevel context. An example demonstrates the feasibility and usefulness of the proposed methods.}, - publisher = {American Psychological Association ({APA})}, - keywords = {multilevel model, hierarchical linear model, indirect effect, mediation, moderated mediation}, -} - -@Article{Cheung-2009a, - author = {Mike W.-L. Cheung}, - date = {2009-05}, - journaltitle = {Behavior Research Methods}, - title = {Comparison of methods for constructing confidence intervals of standardized indirect effects}, - doi = {10.3758/brm.41.2.425}, - number = {2}, - pages = {425--438}, - volume = {41}, - abstract = {Mediation models are often used as a means to explain the psychological mechanisms between an independent and a dependent variable in the behavioral and social sciences. A major limitation of the unstandardized indirect effect calculated from raw scores is that it cannot be interpreted as an effect-size measure. In contrast, the standardized indirect effect calculated from standardized scores can be a good candidate as a measure of effect size because it is scale invariant. In the present article, 11 methods for constructing the confidence intervals (CIs) of the standardized indirect effects were evaluated via a computer simulation. These included six Wald CIs, three bootstrap CIs, one likelihood-based CI, and the PRODCLIN CI. The results consistently showed that the percentile bootstrap, the bias-corrected bootstrap, and the likelihood-based approaches had the best coverage probability. Mplus, LISREL, and Mx syntax were included to facilitate the use of these preferred methods in applied settings. Future issues on the use of the standardized indirect effects are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, coverage probability, structural equation modeling approach}, -} - -@Article{Cheung-2009b, - author = {Mike W.-L. Cheung}, - date = {2009-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Constructing approximate confidence intervals for parameters with structural equation models}, - doi = {10.1080/10705510902751291}, - number = {2}, - pages = {267--294}, - volume = {16}, - abstract = {Confidence intervals (CIs) for parameters are usually constructed based on the estimated standard errors. These are known as Wald CIs. This article argues that likelihood-based CIs (CIs based on likelihood ratio statistics) are often preferred to Wald CIs. It shows how the likelihood-based CIs and the Wald CIs for many statistics and psychometric indexes can be constructed with the use of phantom variables (Rindskopf, 1984) in some of the current structural equation modeling (SEM) packages. The procedures to form CIs for the differences in correlation coefficients, squared multiple correlations, indirect effects, coefficient alphas, and reliability estimates are illustrated. A simulation study on the Pearson correlation is used to demonstrate the advantages of the likelihood-based CI over the Wald CI. Issues arising from this SEM approach and extensions of this approach are discussed.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Cheung-Lau-2007, - author = {Gordon W. Cheung and Rebecca S. Lau}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Testing mediation and suppression effects of latent variables}, - doi = {10.1177/1094428107300343}, - number = {2}, - pages = {296--325}, - volume = {11}, - abstract = {Because of the importance of mediation studies, researchers have been continuously searching for the best statistical test for mediation effect. The approaches that have been most commonly employed include those that use zero-order and partial correlation, hierarchical regression models, and structural equation modeling (SEM). This study extends MacKinnon and colleagues (MacKinnon, Lockwood, Hoffmann, West, \& Sheets, 2002; MacKinnon, Lockwood, \& Williams, 2004, MacKinnon, Warsi, \& Dwyer, 1995) works by conducting a simulation that examines the distribution of mediation and suppression effects of latent variables with SEM, and the properties of confidence intervals developed from eight different methods. Results show that SEM provides unbiased estimates of mediation and suppression effects, and that the bias-corrected bootstrap confidence intervals perform best in testing for mediation and suppression effects. Steps to implement the recommended procedures with Amos are presented.}, - publisher = {{SAGE} Publications}, - keywords = {mediating effects, suppression effects, structural equation modeling}, -} - -@Article{CribariNeto-Souza-Vasconcellos-2007, - author = {Francisco Cribari-Neto and Tatiene C. Souza and Klaus L. P. Vasconcellos}, - date = {2007-08}, - journaltitle = {Communications in Statistics - Theory and Methods}, - title = {Inference under heteroskedasticity and leveraged data}, - doi = {10.1080/03610920601126589}, - number = {10}, - pages = {1877--1888}, - volume = {36}, - abstract = {We evaluate the finite-sample behavior of different heteros-ke-das-ticity-consistent covariance matrix estimators, under both constant and unequal error variances. We consider the estimator proposed by Halbert White (HC0), and also its variants known as HC2, HC3, and HC4; the latter was recently proposed by Cribari-Neto (2004). We propose a new covariance matrix estimator: HC5. It is the first consistent estimator to explicitly take into account the effect that the maximal leverage has on the associated inference. Our numerical results show that quasi-$t$ inference based on HC5 is typically more reliable than inference based on other covariance matrix estimators.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Fritz-MacKinnon-2007, - author = {Matthew S. Fritz and David P. MacKinnon}, - date = {2007-03}, - journaltitle = {Psychological Science}, - title = {Required sample size to detect the mediated effect}, - doi = {10.1111/j.1467-9280.2007.01882.x}, - number = {3}, - pages = {233--239}, - volume = {18}, - abstract = {Mediation models are widely used, and there are many tests of the mediated effect. One of the most common questions that researchers have when planning mediation studies is, ``How many subjects do I need to achieve adequate power when testing for mediation?'' This article presents the necessary sample sizes for six of the most common and the most recommended tests of mediation for various combinations of parameters, to provide a guide for researchers when designing studies or applying for grants.}, - publisher = {{SAGE} Publications}, - keywords = {bootstrap, collinearity, mediation analysis, power, tolerance}, -} - -@Article{Graham-Olchowski-Gilreath-2007, - author = {John W. Graham and Allison E. Olchowski and Tamika D. Gilreath}, - date = {2007-06}, - journaltitle = {Prevention Science}, - title = {How many imputations are really needed? Some practical clarifications of multiple imputation theory}, - doi = {10.1007/s11121-007-0070-9}, - number = {3}, - pages = {206--213}, - volume = {8}, - abstract = {Multiple imputation (MI) and full information maximum likelihood (FIML) are the two most common approaches to missing data analysis. In theory, MI and FIML are equivalent when identical models are tested using the same variables, and when m, the number of imputations performed with MI, approaches infinity. However, it is important to know how many imputations are necessary before MI and FIML are sufficiently equivalent in ways that are important to prevention scientists. MI theory suggests that small values of m, even on the order of three to five imputations, yield excellent results. Previous guidelines for sufficient m are based on relative efficiency, which involves the fraction of missing information ($\gamma$) for the parameter being estimated, and m. In the present study, we used a Monte Carlo simulation to test MI models across several scenarios in which $\gamma$ and m were varied. Standard errors and p-values for the regression coefficient of interest varied as a function of m, but not at the same rate as relative efficiency. Most importantly, statistical power for small effect sizes diminished as m became smaller, and the rate of this power falloff was much greater than predicted by changes in relative efficiency. Based our findings, we recommend that researchers using MI should perform many more imputations than previously considered sufficient. These recommendations are based on $\gamma$, and take into consideration one's tolerance for a preventable power falloff (compared to FIML) due to using too few imputations.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {multiple imputation, number of imputations, full information maximum likelihood, missing data, statistical power}, -} - -@Article{MacKinnon-Fritz-Williams-etal-2007, - author = {David P. MacKinnon and Matthew S. Fritz and Jason Williams and Chondra M. Lockwood}, - date = {2007-08}, - journaltitle = {Behavior Research Methods}, - title = {Distribution of the product confidence limits for the indirect effect: Program {PRODCLIN}}, - doi = {10.3758/bf03193007}, - number = {3}, - pages = {384--389}, - volume = {39}, - abstract = {This article describes a program, PRODCLIN (distribution of the PRODuct Confidence Limits for INdirect effects), written for SAS, SPSS, and R, that computes confidence limits for the product of two normal random variables. The program is important because it can be used to obtain more accurate confidence limits for the indirect effect, as demonstrated in several recent articles (MacKinnon, Lockwood, \& Williams, 2004; Pituch, Whittaker, \& Stapleton, 2005). Tests of the significance of and confidence limits for indirect effects based on the distribution of the product method have more accurate Type I error rates and more power than other, more commonly used tests. Values for the two paths involved in the indirect effect and their standard errors are entered in the PRODCLIN program, and distribution of the product confidence limits are computed. Several examples are used to illustrate the PRODCLIN program. The PRODCLIN programs in rich text format may be downloaded from www.psychonomic.org/archive.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{MacKinnon-Lockwood-Hoffman-etal-2002, - author = {David P. MacKinnon and Chondra M. Lockwood and Jeanne M. Hoffman and Stephen G. West and Virgil Sheets}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {A comparison of methods to test mediation and other intervening variable effects}, - doi = {10.1037/1082-989x.7.1.83}, - number = {1}, - pages = {83--104}, - volume = {7}, - abstract = {A Monte Carlo study compared 14 methods to test the statistical significance of the intervening variable effect. An intervening variable (mediator) transmits the effect of an independent variable to a dependent variable. The commonly used R. M. Baron and D. A. Kenny (1986) approach has low statistical power. Two methods based on the distribution of the product and 2 difference-in-coefficients methods have the most accurate Type I error rates and greatest statistical power except in 1 important case in which Type I error rates are too high. The best balance of Type I error and statistical power across all cases is the test of the joint significance of the two effects comprising the intervening variable effect.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{MacKinnon-Lockwood-Williams-2004, - author = {David P. MacKinnon and Chondra M. Lockwood and Jason Williams}, - date = {2004-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {Confidence limits for the indirect effect: Distribution of the product and resampling methods}, - doi = {10.1207/s15327906mbr3901_4}, - number = {1}, - pages = {99--128}, - volume = {39}, - abstract = {The most commonly used method to test an indirect effect is to divide the estimate of the indirect effect by its standard error and compare the resulting z statistic with a critical value from the standard normal distribution. Confidence limits for the indirect effect are also typically based on critical values from the standard normal distribution. This article uses a simulation study to demonstrate that confidence limits are imbalanced because the distribution of the indirect effect is normal only in special cases. Two alternatives for improving the performance of confidence limits for the indirect effect are evaluated: (a) a method based on the distribution of the product of two normal random variables, and (b) resampling methods. In Study 1, confidence limits based on the distribution of the product are more accurate than methods based on an assumed normal distribution but confidence limits are still imbalanced. Study 2 demonstrates that more accurate confidence limits are obtained using resampling methods, with the bias-corrected bootstrap the best method overall.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-montecarlo, mediation-prodclin}, -} - -@Article{Peugh-Enders-2004, - author = {James L. Peugh and Craig K. Enders}, - date = {2004-12}, - journaltitle = {Review of Educational Research}, - title = {Missing data in educational research: A review of reporting practices and suggestions for improvement}, - doi = {10.3102/00346543074004525}, - number = {4}, - pages = {525--556}, - volume = {74}, - publisher = {American Educational Research Association ({AERA})}, - abstract = {Missing data analyses have received considerable recent attention in the methodological literature, and two ``modern'' methods, multiple imputation and maximum likelihood estimation, are recommended. The goals of this article are to (a) provide an overview of missing-data theory, maximum likelihood estimation, and multiple imputation; (b) conduct a methodological review of missing-data reporting practices in 23 applied research journals; and (c) provide a demonstration of multiple imputation and maximum likelihood estimation using the Longitudinal Study of American Youth data. The results indicated that explicit discussions of missing data increased substantially between 1999 and 2003, but the use of maximum likelihood estimation or multiple imputation was rare; the studies relied almost exclusively on listwise and pairwise deletion.}, - keywords = {EM algorithm, maximum likelihood estimation, missing data, multiple imputation, NORM}, -} - -@Article{Preacher-Hayes-2004, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2004-11}, - journaltitle = {Behavior Research Methods, Instruments, \& Computers}, - title = {{SPSS} and {SAS} procedures for estimating indirect effects in simple mediation models}, - doi = {10.3758/bf03206553}, - number = {4}, - pages = {717--731}, - volume = {36}, - abstract = {Researchers often conduct mediation analysis in order to indirectly assess the effect of a proposed cause on some outcome through a proposed mediator. The utility of mediation analysis stems from its ability to go beyond the merely descriptive to a more functional understanding of the relationships among variables. A necessary component of mediation is a statistically and practically significant indirect effect. Although mediation hypotheses are frequently explored in psychological research, formal significance tests of indirect effects are rarely conducted. After a brief overview of mediation, we argue the importance of directly testing the significance of indirect effects and provide SPSS and SAS macros that facilitate estimation of the indirect effect with a normal theory approach and a bootstrap approach to obtaining confidence intervals, as well as the traditional approach advocated by Baron and Kenny (1986). We hope that this discussion and the macros will enhance the frequency of formal mediation tests in the psychology literature. Electronic copies of these macros may be downloaded from the Psychonomic Society’s Web archive at www.psychonomic.org/archive/.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {life satisfaction, indirect effect, mediation analysis, cognitive therapy, Sobel test}, -} - -@Article{Preacher-Hayes-2008, - author = {Kristopher J. Preacher and Andrew F. Hayes}, - date = {2008-08}, - journaltitle = {Behavior Research Methods}, - title = {Asymptotic and resampling strategies for assessing and comparing indirect effects in multiple mediator models}, - doi = {10.3758/brm.40.3.879}, - number = {3}, - pages = {879--891}, - volume = {40}, - abstract = {Hypotheses involving mediation are common in the behavioral sciences. Mediation exists when a predictor affects a dependent variable indirectly through at least one intervening variable, or mediator. Methods to assess mediation involving multiple simultaneous mediators have received little attention in the methodological literature despite a clear need. We provide an overview of simple and multiple mediation and explore three approaches that can be used to investigate indirect processes, as well as methods for contrasting two or more mediators within a single model. We present an illustrative example, assessing and contrasting potential mediators of the relationship between the helpfulness of socialization agents and job satisfaction. We also provide SAS and SPSS macros, as well as Mplus and LISREL syntax, to facilitate the use of these methods in applications.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {indirect effect, structural equation modeling, residual covariance, total indirect effect, multiple mediator model}, -} - -@Article{Raghunathan-Lepkowski-Hoewyk-etal-2001, - author = {Trivellore E. Raghunathan and James M. Lepkowski and John Van Hoewyk and Peter Solenberger}, - date = {2001}, - journaltitle = {Survey Methodology}, - title = {A multivariate technique for multiply imputing missing values using a sequence of regression models}, - number = {1}, - pages = {85--95}, - volume = {27}, - abstract = {This article describes and evaluates a procedure for imputing missing values for a relatively complex data structure when the data are missing at random. The imputations are obtained by fitting a sequence of regression models and drawing values from the corresponding predictive distributions. The types of regression models used are linear, logistic, Poisson, generalized logit or a mixture of these depending on the type of variable being imputed. Two additional common features in the imputation process are incorporated: restriction to a relevant subpopulation for some variables and logical bounds or constraints for the imputed values. The restrictions involve subsetting the sample individuals that satisfy certain criteria while fitting the regression models. The bounds involve drawing values from a truncated predictive distribution. The development of this method was partly motivated by the analysis of two data sets which are used as illustrations. The sequential regression procedure is applied to perform multiple imputation analysis for the two applied problems. The sampling properties of inferences from multiply imputed data sets created using the sequential regression method are evaluated through simulated data sets.}, - keywords = {item nonresponse, missing at random, multiple imputation, nonignorable missing mechanism, regression, sampling properties and simulations}, -} - -@Article{Schafer-Graham-2002, - author = {Joseph L. Schafer and John W. Graham}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Missing data: Our view of the state of the art}, - doi = {10.1037/1082-989x.7.2.147}, - number = {2}, - pages = {147--177}, - volume = {7}, - abstract = {Statistical procedures for missing data have vastly improved, yet misconception and unsound practice still abound. The authors frame the missing-data problem, review methods, offer advice, and raise issues that remain unresolved. They clear up common misunderstandings regarding the missing at random (MAR) concept. They summarize the evidence against older procedures and, with few exceptions, discourage their use. They present, in both technical and practical language, 2 general approaches that come highly recommended: maximum likelihood (ML) and Bayesian multiple imputation (MI). Newer developments are discussed, including some for dealing with missing data that are not MAR. Although not yet in the mainstream, these procedures may eventually extend the ML and MI methods that currently represent the state of the art.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Serlin-2000, - author = {Ronald C. Serlin}, - date = {2000}, - journaltitle = {Psychological Methods}, - title = {Testing for robustness in {Monte Carlo} studies}, - doi = {10.1037/1082-989x.5.2.230}, - number = {2}, - pages = {230--240}, - volume = {5}, - abstract = {Monte Carlo studies provide the information needed to help researchers select appropriate analytical procedures under design conditions in which the underlying assumptions of the procedures are not met. In Monte Carlo studies, the 2 errors that one could commit involve (a) concluding that a statistical procedure is robust when it is not or (b) concluding that it is not robust when it is. In previous attempts to apply standard statistical design principles to Monte Carlo studies, the less severe of these errors has been wrongly designated the Type I error. In this article, a method is presented for controlling the appropriate Type I error rate; the determination of the number of iterations required in a Monte Carlo study to achieve desired power is described; and a confidence interval for a test's true Type I error rate is derived. A robustness criterion is also proposed that is a compromise between W. G. Cochran's (1952) and J. V. Bradley's (1978) criteria.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Shrout-Bolger-2002, - author = {Patrick E. Shrout and Niall Bolger}, - date = {2002}, - journaltitle = {Psychological Methods}, - title = {Mediation in experimental and nonexperimental studies: New procedures and recommendations}, - doi = {10.1037/1082-989x.7.4.422}, - number = {4}, - pages = {422--445}, - volume = {7}, - publisher = {American Psychological Association ({APA})}, - abstract = {Mediation is said to occur when a causal effect of some variable $X$ on an outcome $Y$ is explained by some intervening variable $M$. The authors recommend that with small to moderate samples, bootstrap methods (B. Efron \& R. Tibshirani, 1993) be used to assess mediation. Bootstrap tests are powerful because they detect that the sampling distribution of the mediated effect is skewed away from 0. They argue that R. M. Baron and D. A. Kenny's (1986) recommendation of first testing the $X \to Y$ association for statistical significance should not be a requirement when there is a priori belief that the effect size is small or suppression is a possibility. Empirical examples and computer setups for bootstrap analyses are provided.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Taylor-MacKinnon-Tein-2007, - author = {Aaron B. Taylor and David P. MacKinnon and Jenn-Yun Tein}, - date = {2007-07}, - journaltitle = {Organizational Research Methods}, - title = {Tests of the three-path mediated effect}, - doi = {10.1177/1094428107300344}, - number = {2}, - pages = {241--269}, - volume = {11}, - abstract = {In a three-path mediational model, two mediators intervene in a series between an independent and a dependent variable. Methods of testing for mediation in such a model are generalized from the more often used single-mediator model. Six such methods are introduced and compared in a Monte Carlo study in terms of their Type I error, power, and coverage. Based on its results, the joint significance test is preferred when only a hypothesis test is of interest. The percentile bootstrap and bias-corrected bootstrap are preferred when a confidence interval on the mediated effect is desired, with the latter having more power but also slightly inflated Type I error in some conditions.}, - publisher = {{SAGE} Publications}, - keywords = {mediation, bootstrapping}, -} - -@Article{vanBuuren-Brand-GroothuisOudshoorn-etal-2006, - author = {Stef {van Buuren} and J. P. L. Brand and C. G. M. Groothuis-Oudshoorn and Donald B. Rubin}, - date = {2006-12}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Fully conditional specification in multivariate imputation}, - doi = {10.1080/10629360600810434}, - number = {12}, - pages = {1049--1064}, - volume = {76}, - abstract = {The use of the Gibbs sampler with fully conditionally specified models, where the distribution of each variable given the other variables is the starting point, has become a popular method to create imputations in incomplete multivariate data. The theoretical weakness of this approach is that the specified conditional densities can be incompatible, and therefore the stationary distribution to which the Gibbs sampler attempts to converge may not exist. This study investigates practical consequences of this problem by means of simulation. Missing data are created under four different missing data mechanisms. Attention is given to the statistical behavior under compatible and incompatible models. The results indicate that multiple imputation produces essentially unbiased estimates with appropriate coverage in the simple cases investigated, even for the incompatible models. Of particular interest is that these results were produced using only five Gibbs iterations starting from a simple draw from observed marginal distributions. It thus appears that, despite the theoretical weaknesses, the actual performance of conditional model specification for multivariate imputation can be quite good, and therefore deserves further study.}, - publisher = {Informa {UK} Limited}, - keywords = {multivariate missing data, multiple imputation, distributional compatibility, Gibbs sampling, simulation, proper imputation}, -} - -@Article{Yuan-Bentler-2000, - author = {Ke-Hai Yuan and Peter M. Bentler}, - date = {2000-08}, - journaltitle = {Sociological Methodology}, - title = {Three likelihood-based methods for mean and covariance structure analysis with nonnormal missing data}, - doi = {10.1111/0081-1750.00078}, - number = {1}, - pages = {165--200}, - volume = {30}, - abstract = {Survey and longitudinal studies in the social and behavioral sciences generally contain missing data. Mean and covariance structure models play an important role in analyzing such data. Two promising methods for dealing with missing data are a direct maximum-likelihood and a two-stage approach based on the unstructured mean and covariance estimates obtained by the EM-algorithm. Typical assumptions under these two methods are ignorable nonresponse and normality of data. However, data sets in social and behavioral sciences are seldom normal, and experience with these procedures indicates that normal theory based methods for nonnormal data very often lead to incorrect model evaluations. By dropping the normal distribution assumption, we develop more accurate procedures for model inference. Based on the theory of generalized estimating equations, a way to obtain consistent standard errors of the two-stage estimates is given. The asymptotic efficiencies of different estimators are compared under various assumptions. We also propose a minimum chi-square approach and show that the estimator obtained by this approach is asymptotically at least as efficient as the two likelihood-based estimators for either normal or nonnormal data. The major contribution of this paper is that for each estimator, we give a test statistic whose asymptotic distribution is chisquare as long as the underlying sampling distribution enjoys finite fourth-order moments. We also give a characterization for each of the two likelihood ratio test statistics when the underlying distribution is nonnormal. Modifications to the likelihood ratio statistics are also given. Our working assumption is that the missing data mechanism is missing completely at random. Examples and Monte Carlo studies indicate that, for commonly encountered nonnormal distributions, the procedures developed in this paper are quite reliable even for samples with missing data that are missing at random.}, - publisher = {{SAGE} Publications}, -} - -@Book{MacKinnon-2008, - author = {David P. MacKinnon}, - series = {Multivariate applications}, - date = {2008}, - title = {Introduction to statistical mediation analysis}, - doi = {10.4324/9780203809556}, - isbn = {9780805864298}, - location = {Hoboken}, - pages = {488}, - library = {QA278.2 .M29 2008}, - addendum = {https://lccn.loc.gov/2007011793}, - abstract = {This volume introduces the statistical, methodological, and conceptual aspects of mediation analysis. Applications from health, social, and developmental psychology, sociology, communication, exercise science, and epidemiology are emphasized throughout. Single-mediator, multilevel, and longitudinal models are reviewed. The author's goal is to help the reader apply mediation analysis to their own data and understand its limitations. - Each chapter features an overview, numerous worked examples, a summary, and exercises (with answers to the odd numbered questions). The accompanying downloadable resources contain outputs described in the book from SAS, SPSS, LISREL, EQS, MPLUS, and CALIS, and a program to simulate the model. The notation used is consistent with existing literature on mediation in psychology. - The book opens with a review of the types of research questions the mediation model addresses. Part II describes the estimation of mediation effects including assumptions, statistical tests, and the construction of confidence limits. Advanced models including mediation in path analysis, longitudinal models, multilevel data, categorical variables, and mediation in the context of moderation are then described. The book closes with a discussion of the limits of mediation analysis, additional approaches to identifying mediating variables, and future directions. - Introduction to Statistical Mediation Analysis is intended for researchers and advanced students in health, social, clinical, and developmental psychology as well as communication, public health, nursing, epidemiology, and sociology. Some exposure to a graduate level research methods or statistics course is assumed. The overview of mediation analysis and the guidelines for conducting a mediation analysis will be appreciated by all readers.}, - publisher = {Erlbaum Psych Press}, - keywords = {Mediation (Statistics)}, -} - -@Book{Venables-Ripley-2002, - author = {W. N. Venables and B. D. Ripley}, - date = {2002}, - title = {Modern applied statistics with {S}}, - doi = {10.1007/978-0-387-21706-2}, - publisher = {Springer New York}, -} - -@Article{Biesanz-Falk-Savalei-2010, - author = {Jeremy C. Biesanz and Carl F. Falk and Victoria Savalei}, - date = {2010-08}, - journaltitle = {Multivariate Behavioral Research}, - title = {Assessing mediational models: Testing and interval estimation for indirect effects}, - doi = {10.1080/00273171.2010.498292}, - number = {4}, - pages = {661--701}, - volume = {45}, - abstract = {Theoretical models specifying indirect or mediated effects are common in the social sciences. An indirect effect exists when an independent variable's influence on the dependent variable is mediated through an intervening variable. Classic approaches to assessing such mediational hypotheses (Baron \& Kenny, 1986; Sobel, 1982) have in recent years been supplemented by computationally intensive methods such as bootstrapping, the distribution of the product methods, and hierarchical Bayesian Markov chain Monte Carlo (MCMC) methods. These different approaches for assessing mediation are illustrated using data from Dunn, Biesanz, Human, and Finn (2007). However, little is known about how these methods perform relative to each other, particularly in more challenging situations, such as with data that are incomplete and/or nonnormal. This article presents an extensive Monte Carlo simulation evaluating a host of approaches for assessing mediation. We examine Type I error rates, power, and coverage. We study normal and nonnormal data as well as complete and incomplete data. In addition, we adapt a method, recently proposed in statistical literature, that does not rely on confidence intervals (CIs) to test the null hypothesis of no indirect effect. The results suggest that the new inferential method--the partial posterior p value--slightly outperforms existing ones in terms of maintaining Type I error rates while maximizing power, especially with incomplete data. Among confidence interval approaches, the bias-corrected accelerated (BCa) bootstrapping approach often has inflated Type I error rates and inconsistent coverage and is not recommended.In contrast, the bootstrapped percentile confidence interval and the hierarchical Bayesian MCMC method perform best overall,maintaining Type I error rates,exhibiting reasonable power, and producing stable and accurate coverage rates.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Blanca-Arnau-LopezMontiel-etal-2013, - author = {Mar\'\iaJ. Blanca and Jaume Arnau and Dolores L{\a'o}pez-Montiel and Roser Bono and Rebecca Bendayan}, - date = {2013-05}, - journaltitle = {Methodology}, - title = {Skewness and kurtosis in real data samples}, - doi = {10.1027/1614-2241/a000057}, - number = {2}, - pages = {78--84}, - volume = {9}, - abstract = {Parametric statistics are based on the assumption of normality. Recent findings suggest that Type I error and power can be adversely affected when data are non-normal. This paper aims to assess the distributional shape of real data by examining the values of the third and fourth central moments as a measurement of skewness and kurtosis in small samples. The analysis concerned 693 distributions with a sample size ranging from 10 to 30. Measures of cognitive ability and of other psychological variables were included. The results showed that skewness ranged between -2.49 and 2.33. The values of kurtosis ranged between -1.92 and 7.41. Considering skewness and kurtosis together the results indicated that only 5.5\% of distributions were close to expected values under normality. Although extreme contamination does not seem to be very frequent, the findings are consistent with previous research suggesting that normality is not the rule with real data.}, - publisher = {Hogrefe Publishing Group}, -} - -@Article{Boettiger-Eddelbuettel-2017, - author = {Carl Boettiger and Dirk Eddelbuettel}, - date = {2017}, - journaltitle = {The R Journal}, - title = {An introduction to {Rocker}: Docker containers for {R}}, - doi = {10.32614/rj-2017-065}, - number = {2}, - pages = {527}, - volume = {9}, - abstract = {We describe the Rocker project, which provides a widely-used suite of Docker images with customized R environments for particular tasks. We discuss how this suite is organized, and how these tools can increase portability, scaling, reproducibility, and convenience of R users and developers.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-docker-rocker}, -} - -@Article{Chow-Ho-Hamaker-etal-2010, - author = {Sy-Miin Chow and Moon-ho R. Ho and Ellen L. Hamaker and Conor V. Dolan}, - date = {2010-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Equivalence and differences between structural equation modeling and state-space modeling techniques}, - doi = {10.1080/10705511003661553}, - number = {2}, - pages = {303--332}, - volume = {17}, - abstract = {State-space modeling techniques have been compared to structural equation modeling (SEM) techniques in various contexts but their unique strengths have often been overshadowed by their similarities to SEM. In this article, we provide a comprehensive discussion of these 2 approaches' similarities and differences through analytic comparisons and numerical simulations, with a focus on their use in representing intraindividual dynamics and interindividual differences. To demonstrate the respective strengths and weaknesses of the 2 approaches in representing these 2 aspects, we simulated data under (a) a cross-sectional common factor model, (b) a latent difference score model with random effects in intercept and slope, and (c) a bivariate dynamic factor analysis model with auto- and cross-regression parameters. Possible ways in which SEM and state-space modeling can be utilized as complementary tools in representing human developmental and other related processes are discussed.}, - publisher = {Informa {UK} Limited}, - annotation = {ild, sem, ssm}, -} - -@Article{Deboeck-Preacher-2015, - author = {Pascal R. Deboeck and Kristopher J. Preacher}, - date = {2015-06}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {No Need to be Discrete: A Method for Continuous Time Mediation Analysis}, - doi = {10.1080/10705511.2014.973960}, - number = {1}, - pages = {61--75}, - volume = {23}, - abstract = {Mediation is one concept that has shaped numerous theories. The list of problems associated with mediation models, however, has been growing. Mediation models based on cross-sectional data can produce unexpected estimates, so much so that making longitudinal or causal inferences is inadvisable. Even longitudinal mediation models have faults, as parameter estimates produced by these models are specific to the lag between observations, leading to much debate over appropriate lag selection. Using continuous time models (CTMs) rather than commonly employed discrete time models, one can estimate lag-independent parameters. We demonstrate methodology that allows for continuous time mediation analyses, with attention to concepts such as indirect and direct effects, partial mediation, the effect of lag, and the lags at which relations become maximal. A simulation compares common longitudinal mediation methods with CTMs. Reanalysis of a published covariance matrix demonstrates that CTMs can be fit to data used in longitudinal mediation studies.}, - publisher = {Informa {UK} Limited}, - keywords = {continuous time models, cross-lagged panel model, exact discrete model, longitudinal mediation, mediation}, - annotation = {ild, ild-mediation}, -} - -@Article{Dudgeon-2017, - author = {Paul Dudgeon}, - date = {2017-03}, - journaltitle = {Psychometrika}, - title = {Some improvements in confidence intervals for standardized regression coefficients}, - doi = {10.1007/s11336-017-9563-z}, - number = {4}, - pages = {928--951}, - volume = {82}, - keywords = {standardized regression coefficients, robust confidence intervals, non-normality}, - abstract = {Yuan and Chan (Psychometrika 76:670–690, 2011. doi:10.1007/S11336-011-9224-6) derived consistent confidence intervals for standardized regression coefficients under fixed and random score assumptions. Jones and Waller (Psychometrika 80:365–378, 2015. doi:10.1007/S11336-013-9380-Y) extended these developments to circumstances where data are non-normal by examining confidence intervals based on Browne's (Br J Math Stat Psychol 37:62–83, 1984. doi:10.1111/j.2044-8317.1984.tb00789.x) asymptotic distribution-free (ADF) theory. Seven different heteroscedastic-consistent (HC) estimators were investigated in the current study as potentially better solutions for constructing confidence intervals on standardized regression coefficients under non-normality. Normal theory, ADF, and HC estimators were evaluated in a Monte Carlo simulation. Findings confirmed the superiority of the HC3 (MacKinnon and White, J Econ 35:305–325, 1985. doi:10.1016/0304-4076(85)90158-7) and HC5 (Cribari-Neto and Da Silva, Adv Stat Anal 95:129–146, 2011. doi:10.1007/s10182-010-0141-2) interval estimators over Jones and Waller's ADF estimator under all conditions investigated, as well as over the normal theory method. The HC5 estimator was more robust in a restricted set of conditions over the HC3 estimator. Some possible extensions of HC estimators to other effect size measures are considered for future developments.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Eddelbuettel-Francois-2011, - author = {Dirk Eddelbuettel and Romain Fran{\c c}ois}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{Rcpp}: Seamless {R} and {C++} integration}, - doi = {10.18637/jss.v040.i08}, - number = {8}, - volume = {40}, - abstract = {The Rcpp package simplifies integrating C++ code with R. It provides a consistent C++ class hierarchy that maps various types of R objects (vectors, matrices, functions, environments, ...) to dedicated C++ classes. Object interchange between R and C++ is managed by simple, flexible and extensible concepts which include broad support for C++ Standard Template Library idioms. C++ code can both be compiled, linked and loaded on the fly, or added via packages. Flexible error and exception code handling is provided. Rcpp substantially lowers the barrier for programmers wanting to combine C++ code with R.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages}, -} - -@Article{Hayes-Scharkow-2013, - author = {Andrew F. Hayes and Michael Scharkow}, - date = {2013-08}, - journaltitle = {Psychological Science}, - title = {The relative trustworthiness of inferential tests of the indirect effect in statistical mediation analysis}, - doi = {10.1177/0956797613480187}, - number = {10}, - pages = {1918--1927}, - volume = {24}, - abstract = {A content analysis of 2 years of Psychological Science articles reveals inconsistencies in how researchers make inferences about indirect effects when conducting a statistical mediation analysis. In this study, we examined the frequency with which popularly used tests disagree, whether the method an investigator uses makes a difference in the conclusion he or she will reach, and whether there is a most trustworthy test that can be recommended to balance practical and performance considerations. We found that tests agree much more frequently than they disagree, but disagreements are more common when an indirect effect exists than when it does not. We recommend the bias-corrected bootstrap confidence interval as the most trustworthy test if power is of utmost concern, although it can be slightly liberal in some circumstances. Investigators concerned about Type I errors should choose the Monte Carlo confidence interval or the distribution-of-the-product approach, which rarely disagree. The percentile bootstrap confidence interval is a good compromise test.}, - publisher = {{SAGE} Publications}, -} - -@Article{Hunter-2017, - author = {Michael D. Hunter}, - date = {2017-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {State Space Modeling in an Open Source, Modular, Structural Equation Modeling Environment}, - doi = {10.1080/10705511.2017.1369354}, - number = {2}, - pages = {307--324}, - volume = {25}, - abstract = {State space models (SSMs) are introduced in the context of structural equation modeling (SEM). In particular, the OpenMx implementation of SSMs using the Kalman filter and prediction error decomposition is discussed. In reflection of modularity, the implementation uses the same full information maximum likelihood missing data procedures for SSMs and SEMs. Similarly, generic OpenMx features such as likelihood ratio tests, profile likelihood confidence intervals, Hessian-based standard errors, definition variables, and the matrix algebra interface are all supported. Example scripts for specification of autoregressive models, multiple lag models (VAR(p)), multiple lag moving average models (VARMA(p, q)), multiple subject models, and latent growth models are provided. Additionally, latent variable calculation based on the Kalman filter and raw data generation based on a model are all included. Finally, future work for extending SSMs to allow for random effects and for presenting them in diagrams is discussed.}, - publisher = {Informa {UK} Limited}, - keywords = {state space model, software, Kalman filter, OpenMx}, - annotation = {ild, ild-software, sem, sem-software, ssm, ssm-software}, -} - -@Article{Jones-Waller-2013a, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013}, - journaltitle = {Psychological Methods}, - title = {Computing confidence intervals for standardized regression coefficients.}, - doi = {10.1037/a0033269}, - number = {4}, - pages = {435--453}, - volume = {18}, - abstract = {With fixed predictors, the standard method (Cohen, Cohen, West, \& Aiken, 2003, p. 86; Harris, 2001, p. 80; Hays, 1994, p. 709) for computing confidence intervals (CIs) for standardized regression coefficients fails to account for the sampling variability of the criterion standard deviation. With random predictors, this method also fails to account for the sampling variability of the predictor standard deviations. Nevertheless, under some conditions the standard method will produce CIs with accurate coverage rates. To delineate these conditions, we used a Monte Carlo simulation to compute empirical CI coverage rates in samples drawn from 36 populations with a wide range of data characteristics. We also computed the empirical CI coverage rates for 4 alternative methods that have been discussed in the literature: noncentrality interval estimation, the delta method, the percentile bootstrap, and the bias-corrected and accelerated bootstrap. Our results showed that for many data-parameter configurations--for example, sample size, predictor correlations, coefficient of determination ($R^2$), orientation of $\beta$ with respect to the eigenvectors of the predictor correlation matrix, $R_X$--the standard method produced coverage rates that were close to their expected values. However, when population $R^2$ was large and when $\beta$ approached the last eigenvector of $R_X$, then the standard method coverage rates were frequently below the nominal rate (sometimes by a considerable amount). In these conditions, the delta method and the 2 bootstrap procedures were consistently accurate. Results using noncentrality interval estimation were inconsistent. In light of these findings, we recommend that researchers use the delta method to evaluate the sampling variability of standardized regression coefficients.}, - publisher = {American Psychological Association ({APA})}, -} - -@Article{Jones-Waller-2015, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2015-06}, - journaltitle = {Psychometrika}, - title = {The Normal-Theory and Asymptotic Distribution-Free ({ADF}) Covariance Matrix of Standardized Regression Coefficients: Theoretical Extensions and Finite Sample Behavior}, - doi = {10.1007/s11336-013-9380-y}, - number = {2}, - pages = {365--378}, - volume = {80}, - abstract = {Yuan and Chan (Psychometrika, 76, 670–690, 2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a method for computing this covariance matrix from correlations. Next, we describe an asymptotic distribution-free (ADF; Browne in British Journal of Mathematical and Statistical Psychology, 37, 62–83, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that the ADF method works well with nonnormal data in moderate-to-large samples using both simulated and real-data examples. R code (R Development Core Team, 2012) is available from the authors or through the Psychometrika online repository for supplementary materials.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {standardized-regression, standardized-regression-hc}, -} - -@Article{Koopman-Howe-Hollenbeck-etal-2015, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck and Hock-Peng Sin}, - date = {2015}, - journaltitle = {Journal of Applied Psychology}, - title = {Small sample mediation testing: Misplaced confidence in bootstrapped confidence intervals}, - doi = {10.1037/a0036635}, - number = {1}, - pages = {194--202}, - volume = {100}, - abstract = {Bootstrapping is an analytical tool commonly used in psychology to test the statistical significance of the indirect effect in mediation models. Bootstrapping proponents have particularly advocated for its use for samples of 20-80 cases. This advocacy has been heeded, especially in the Journal of Applied Psychology, as researchers are increasingly utilizing bootstrapping to test mediation with samples in this range. We discuss reasons to be concerned with this escalation, and in a simulation study focused specifically on this range of sample sizes, we demonstrate not only that bootstrapping has insufficient statistical power to provide a rigorous hypothesis test in most conditions but also that bootstrapping has a tendency to exhibit an inflated Type I error rate. We then extend our simulations to investigate an alternative empirical resampling method as well as a Bayesian approach and demonstrate that they exhibit comparable statistical power to bootstrapping in small samples without the associated inflated Type I error. Implications for researchers testing mediation hypotheses in small samples are presented. For researchers wishing to use these methods in their own research, we have provided R syntax in the online supplemental materials.}, - publisher = {American Psychological Association ({APA})}, - keywords = {mediation, bootstrapping, permutation, Bayes}, -} - -@Article{Kurtzer-Sochat-Bauer-2017, - author = {Gregory M. Kurtzer and Vanessa Sochat and Michael W. Bauer}, - date = {2017-05}, - journaltitle = {{PLOS} {ONE}}, - title = {{Singularity}: Scientific containers for mobility of compute}, - doi = {10.1371/journal.pone.0177459}, - editor = {Attila Gursoy}, - number = {5}, - pages = {e0177459}, - volume = {12}, - publisher = {Public Library of Science ({PLoS})}, - annotation = {container, container-singularity}, -} - -@Article{Kwan-Chan-2011, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2011-04}, - journaltitle = {Behavior Research Methods}, - title = {Comparing standardized coefficients in structural equation modeling: A model reparameterization approach}, - doi = {10.3758/s13428-011-0088-6}, - number = {3}, - pages = {730--745}, - volume = {43}, - abstract = {We propose a two-stage method for comparing standardized coefficients in structural equation modeling (SEM). At stage 1, we transform the original model of interest into the standardized model by model reparameterization, so that the model parameters appearing in the standardized model are equivalent to the standardized parameters of the original model. At stage 2, we impose appropriate linear equality constraints on the standardized model and use a likelihood ratio test to make statistical inferences about the equality of standardized coefficients. Unlike other existing methods for comparing standardized coefficients, the proposed method does not require specific modeling features (e.g., specification of nonlinear constraints), which are available only in certain SEM software programs. Moreover, this method allows researchers to compare two or more standardized coefficients simultaneously in a standard and convenient way. Three real examples are given to illustrate the proposed method, using EQS, a popular SEM software program. Results show that the proposed method performs satisfactorily for testing the equality of standardized coefficients.}, - publisher = {Springer Science and Business Media {LLC}}, -} - -@Article{Kwan-Chan-2014, - author = {Joyce L. Y. Kwan and Wai Chan}, - date = {2014-04}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Comparing squared multiple correlation coefficients using structural equation modeling}, - doi = {10.1080/10705511.2014.882673}, - number = {2}, - pages = {225--238}, - volume = {21}, - abstract = {In social science research, a common topic in multiple regression analysis is to compare the squared multiple correlation coefficients in different populations. Existing methods based on asymptotic theories (Olkin \& Finn, 1995) and bootstrapping (Chan, 2009) are available but these can only handle a 2-group comparison. Another method based on structural equation modeling (SEM) has been proposed recently. However, this method has three disadvantages. First, it requires the user to explicitly specify the sample R2 as a function in terms of the basic SEM model parameters, which is sometimes troublesome and error prone. Second, it requires the specification of nonlinear constraints, which is not available in some popular SEM software programs. Third, it is for a 2-group comparison primarily. In this article, a 2-stage SEM method is proposed as an alternative. Unlike all other existing methods, the proposed method is simple to use, and it does not require any specific programming features such as the specification of nonlinear constraints. More important, the method allows a simultaneous comparison of 3 or more groups. A real example is given to illustrate the proposed method using EQS, a popular SEM software program.}, - keywords = {squared multiple correlation coefficients, structural equation modeling, model reparameterization, multi-sample analysis}, - publisher = {Informa {UK} Limited}, -} - -@Article{Merkel-2014, - author = {Dirk Merkel}, - date = {2014}, - journaltitle = {Linux Journal}, - title = {{Docker}: Lightweight {Linux} containers for consistent development and deployment}, - number = {239}, - pages = {2}, - volume = {2014}, - url = {https://www.linuxjournal.com/content/docker-lightweight-linux-containers-consistent-development-and-deployment}, - annotation = {container, container-docker}, -} - -@Article{Neale-Hunter-Pritikin-etal-2015, - author = {Michael C. Neale and Michael D. Hunter and Joshua N. Pritikin and Mahsa Zahery and Timothy R. Brick and Robert M. Kirkpatrick and Ryne Estabrook and Timothy C. Bates and Hermine H. Maes and Steven M. Boker}, - date = {2015-01}, - journaltitle = {Psychometrika}, - title = {{OpenMx} 2.0: Extended Structural Equation and Statistical Modeling}, - doi = {10.1007/s11336-014-9435-8}, - number = {2}, - pages = {535--549}, - volume = {81}, - abstract = {The new software package OpenMx 2.0 for structural equation and other statistical modeling is introduced and its features are described. OpenMx is evolving in a modular direction and now allows a mix-and-match computational approach that separates model expectations from fit functions and optimizers. Major backend architectural improvements include a move to swappable open-source optimizers such as the newly written CSOLNP. Entire new methodologies such as item factor analysis and state space modeling have been implemented. New model expectation functions including support for the expression of models in LISREL syntax and a simplified multigroup expectation function are available. Ease-of-use improvements include helper functions to standardize model parameters and compute their Jacobian-based standard errors, access to model components through standard R \$ mechanisms, and improved tab completion from within the R Graphical User Interface.}, - publisher = {Springer Science and Business Media {LLC}}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Ou-Hunter-Chow-2019, - author = {Lu Ou and Michael D. Hunter and Sy-Miin Chow}, - date = {2019}, - journaltitle = {The R Journal}, - title = {What's for {dynr}: A package for linear and nonlinear dynamic modeling in {R}}, - doi = {10.32614/rj-2019-012}, - number = {1}, - pages = {91}, - volume = {11}, - abstract = {Intensive longitudinal data in the behavioral sciences are often noisy, multivariate in nature, and may involve multiple units undergoing regime switches by showing discontinuities interspersed with continuous dynamics. Despite increasing interest in using linear and nonlinear differential/difference equation models with regime switches, there has been a scarcity of software packages that are fast and freely accessible. We have created an R package called dynr that can handle a broad class of linear and nonlinear discreteand continuous-time models, with regime-switching properties and linear Gaussian measurement functions, in C, while maintaining simple and easy-to learn model specification functions in R. We present the mathematical and computational bases used by the dynr R package, and present two illustrative examples to demonstrate the unique features of dynr.}, - publisher = {The R Foundation}, - annotation = {ild, ild-software, r, r-packages}, -} - -@Article{Preacher-Selig-2012, - author = {Kristopher J. Preacher and James P. Selig}, - date = {2012-04}, - journaltitle = {Communication Methods and Measures}, - title = {Advantages of Monte Carlo Confidence Intervals for Indirect Effects}, - doi = {10.1080/19312458.2012.679848}, - number = {2}, - pages = {77--98}, - volume = {6}, - abstract = {Monte Carlo simulation is a useful but underutilized method of constructing confidence intervals for indirect effects in mediation analysis. The Monte Carlo confidence interval method has several distinct advantages over rival methods. Its performance is comparable to other widely accepted methods of interval construction, it can be used when only summary data are available, it can be used in situations where rival methods (e.g., bootstrapping and distribution of the product methods) are difficult or impossible, and it is not as computer-intensive as some other methods. In this study we discuss Monte Carlo confidence intervals for indirect effects, report the results of a simulation study comparing their performance to that of competing methods, demonstrate the method in applied examples, and discuss several software options for implementation in applied settings.}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-montecarlo, mediation-bootstrap, semmcci}, -} - -@Article{Rosseel-2012, - author = {Yves Rosseel}, - date = {2012}, - journaltitle = {Journal of Statistical Software}, - title = {{lavaan}: An {R} package for structural equation modeling}, - doi = {10.18637/jss.v048.i02}, - number = {2}, - volume = {48}, - abstract = {Structural equation modeling (SEM) is a vast field and widely used by many applied researchers in the social and behavioral sciences. Over the years, many software packages for structural equation modeling have been developed, both free and commercial. However, perhaps the best state-of-the-art software packages in this field are still closed-source and/or commercial. The R package lavaan has been developed to provide applied researchers, teachers, and statisticians, a free, fully open-source, but commercial-quality package for latent variable modeling. This paper explains the aims behind the development of the package, gives an overview of its most important features, and provides some examples to illustrate how lavaan works in practice.}, - publisher = {Foundation for Open Access Statistic}, - annotation = {r, r-packages, sem, sem-software}, -} - -@Article{Schouten-Lugtig-Vink-2018, - author = {Rianne Margaretha Schouten and Peter Lugtig and Gerko Vink}, - date = {2018-07}, - journaltitle = {Journal of Statistical Computation and Simulation}, - title = {Generating missing values for simulation purposes: A multivariate amputation procedure}, - doi = {10.1080/00949655.2018.1491577}, - number = {15}, - pages = {2909--2930}, - volume = {88}, - abstract = {Missing data form a ubiquitous problem in scientific research, especially since most statistical analyses require complete data. To evaluate the performance of methods dealing with missing data, researchers perform simulation studies. An important aspect of these studies is the generation of missing values in a simulated, complete data set: the amputation procedure. We investigated the methodological validity and statistical nature of both the current amputation practice and a newly developed and implemented multivariate amputation procedure. We found that the current way of practice may not be appropriate for the generation of intuitive and reliable missing data problems. The multivariate amputation procedure, on the other hand, generates reliable amputations and allows for a proper regulation of missing data problems. The procedure has additional features to generate any missing data scenario precisely as intended. Hence, the multivariate amputation procedure is an efficient method to accurately evaluate missing data methodology.}, - publisher = {Informa {UK} Limited}, - keywords = {missing data, multiple imputation, multivariate amputation, evaluation}, -} - -@Article{Tofighi-Kelley-2019, - author = {Davood Tofighi and Ken Kelley}, - date = {2019-06}, - journaltitle = {Multivariate Behavioral Research}, - title = {Indirect effects in sequential mediation models: Evaluating methods for hypothesis testing and confidence interval formation}, - doi = {10.1080/00273171.2019.1618545}, - number = {2}, - pages = {188--210}, - volume = {55}, - abstract = {Complex mediation models, such as a two-mediator sequential model, have become more prevalent in the literature. To test an indirect effect in a two-mediator model, we conducted a large-scale Monte Carlo simulation study of the Type I error, statistical power, and confidence interval coverage rates of 10 frequentist and Bayesian confidence/credible intervals (CIs) for normally and nonnormally distributed data. The simulation included never-studied methods and conditions (e.g., Bayesian CI with flat and weakly informative prior methods, two model-based bootstrap methods, and two nonnormality conditions) as well as understudied methods (e.g., profile-likelihood, Monte Carlo with maximum likelihood standard error [MC-ML] and robust standard error [MC-Robust]). The popular BC bootstrap showed inflated Type I error rates and CI under-coverage. We recommend different methods depending on the purpose of the analysis. For testing the null hypothesis of no mediation, we recommend MC-ML, profile-likelihood, and two Bayesian methods. To report a CI, if data has a multivariate normal distribution, we recommend MC-ML, profile-likelihood, and the two Bayesian methods; otherwise, for multivariate nonnormal data we recommend the percentile bootstrap. We argue that the best method for testing hypotheses is not necessarily the best method for CI construction, which is consistent with the findings we present.}, - keywords = {indirect effect, confidence interval, sequential mediation, Bayesian credible interval}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bayesian, mediation-bootstrap, mediation-lb, mediation-montecarlo, semmcci}, -} - -@Article{Tofighi-MacKinnon-2015, - author = {Davood Tofighi and David P. MacKinnon}, - date = {2015-08}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{Monte Carlo} confidence intervals for complex functions of indirect effects}, - doi = {10.1080/10705511.2015.1057284}, - number = {2}, - pages = {194--205}, - volume = {23}, - abstract = {One challenge in mediation analysis is to generate a confidence interval (CI) with high coverage and power that maintains a nominal significance level for any well-defined function of indirect and direct effects in the general context of structural equation modeling (SEM). This study discusses a proposed Monte Carlo extension that finds the CIs for any well-defined function of the coefficients of SEM such as the product of $k$ coefficients and the ratio of the contrasts of indirect effects, using the Monte Carlo method. Finally, we conduct a small-scale simulation study to compare CIs produced by the Monte Carlo, nonparametric bootstrap, and asymptotic-delta methods. Based on our simulation study, we recommend researchers use the Monte Carlo method to test a complex function of indirect effects.}, - keywords = {confidence interval, mediation analysis, Monte Carlo}, - publisher = {Informa {UK} Limited}, - annotation = {mediation, mediation-bootstrap, mediation-delta, mediation-montecarlo, semmcci}, -} - -@Article{vanBuuren-GroothuisOudshoorn-2011, - author = {Stef {van Buuren} and Karin Groothuis-Oudshoorn}, - date = {2011}, - journaltitle = {Journal of Statistical Software}, - title = {{mice}: Multivariate Imputation by Chained Equations in {R}}, - doi = {10.18637/jss.v045.i03}, - number = {3}, - volume = {45}, - abstract = {The R package mice imputes incomplete multivariate data by chained equations. The software mice 1.0 appeared in the year 2000 as an S-PLUS library, and in 2001 as an R package. mice 1.0 introduced predictor selection, passive imputation and automatic pooling. This article documents mice, which extends the functionality of mice 1.0 in several ways. In mice, the analysis of imputed data is made completely general, whereas the range of models under which pooling works is substantially extended. mice adds new functionality for imputing multilevel data, automatic predictor selection, data handling, post-processing imputed values, specialized pooling routines, model selection tools, and diagnostic graphs. Imputation of categorical data is improved in order to bypass problems caused by perfect prediction. Special attention is paid to transformations, sum scores, indices and interactions using passive imputation, and to the proper setup of the predictor matrix. mice can be downloaded from the Comprehensive R Archive Network. This article provides a hands-on, stepwise approach to solve applied incomplete data problems.}, - publisher = {Foundation for Open Access Statistic}, - keywords = {MICE, multiple imputation, chained equations, fully conditional specification, Gibbs sampler, predictor selection, passive imputation, R}, -} - -@Article{Wu-Jia-2013, - author = {Wei Wu and Fan Jia}, - date = {2013-09}, - journaltitle = {Multivariate Behavioral Research}, - title = {A new procedure to test mediation with missing data through nonparametric bootstrapping and multiple imputation}, - doi = {10.1080/00273171.2013.816235}, - number = {5}, - pages = {663--691}, - volume = {48}, - abstract = {This article proposes a new procedure to test mediation with the presence of missing data by combining nonparametric bootstrapping with multiple imputation (MI). This procedure performs MI first and then bootstrapping for each imputed data set. The proposed procedure is more computationally efficient than the procedure that performs bootstrapping first and then MI for each bootstrap sample. The validity of the procedure is evaluated using a simulation study under different sample size, missing data mechanism, missing data proportion, and shape of distribution conditions. The result suggests that the proposed procedure performs comparably to the procedure that combines bootstrapping with full information maximum likelihood under most conditions. However, caution needs to be taken when using this procedure to handle missing not-at-random or nonnormal data.}, - publisher = {Informa {UK} Limited}, -} - -@Article{Yuan-Chan-2011, - author = {Ke-Hai Yuan and Wai Chan}, - date = {2011-08}, - journaltitle = {Psychometrika}, - title = {Biases and Standard Errors of Standardized Regression Coefficients}, - doi = {10.1007/s11336-011-9224-6}, - number = {4}, - pages = {670--690}, - volume = {76}, - abstract = {The paper obtains consistent standard errors (SE) and biases of order O(1/n) for the sample standardized regression coefficients with both random and given predictors. Analytical results indicate that the formulas for SEs given in popular text books are consistent only when the population value of the regression coefficient is zero. The sample standardized regression coefficients are also biased in general, although it should not be a concern in practice when the sample size is not too small. Monte Carlo results imply that, for both standardized and unstandardized sample regression coefficients, SE estimates based on asymptotics tend to under-predict the empirical ones at smaller sample sizes.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {asymptotics, bias, consistency, Monte Carlo}, - annotation = {standardized-regression, standardized-regression-delta, standardized-regression-normal, standardized-regression-adf}, -} - -@Article{Yzerbyt-Muller-Batailler-etal-2018, - author = {Vincent Yzerbyt and Dominique Muller and C{\a'e}dric Batailler and Charles M. Judd}, - date = {2018-12}, - journaltitle = {Journal of Personality and Social Psychology}, - title = {New recommendations for testing indirect effects in mediational models: The need to report and test component paths}, - doi = {10.1037/pspa0000132}, - number = {6}, - pages = {929--943}, - volume = {115}, - abstract = {In light of current concerns with replicability and reporting false-positive effects in psychology, we examine Type I errors and power associated with 2 distinct approaches for the assessment of mediation, namely the component approach (testing individual parameter estimates in the model) and the index approach (testing a single mediational index). We conduct simulations that examine both approaches and show that the most commonly used tests under the index approach risk inflated Type I errors compared with the joint-significance test inspired by the component approach. We argue that the tendency to report only a single mediational index is worrisome for this reason and also because it is often accompanied by a failure to critically examine the individual causal paths underlying the mediational model. We recommend testing individual components of the indirect effect to argue for the presence of an indirect effect and then using other recommended procedures to calculate the size of that effect. Beyond simple mediation, we show that our conclusions also apply in cases of within-participant mediation and moderated mediation. We also provide a new R-package that allows for an easy implementation of our recommendations.}, - publisher = {American Psychological Association ({APA})}, - keywords = {indirect effects, mediation, joint-significance, bootstrap}, -} - -@Article{Zhang-Wang-2012, - author = {Zhiyong Zhang and Lijuan Wang}, - date = {2012-12}, - journaltitle = {Psychometrika}, - title = {Methods for mediation analysis with missing data}, - doi = {10.1007/s11336-012-9301-5}, - number = {1}, - pages = {154--184}, - volume = {78}, - abstract = {Despite wide applications of both mediation models and missing data techniques, formal discussion of mediation analysis with missing data is still rare. We introduce and compare four approaches to dealing with missing data in mediation analysis including listwise deletion, pairwise deletion, multiple imputation (MI), and a two-stage maximum likelihood (TS-ML) method. An R package bmem is developed to implement the four methods for mediation analysis with missing data in the structural equation modeling framework, and two real examples are used to illustrate the application of the four methods. The four methods are evaluated and compared under MCAR, MAR, and MNAR missing data mechanisms through simulation studies. Both MI and TS-ML perform well for MCAR and MAR data regardless of the inclusion of auxiliary variables and for AV-MNAR data with auxiliary variables. Although listwise deletion and pairwise deletion have low power and large parameter estimation bias in many studied conditions, they may provide useful information for exploring missing mechanisms.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {mediation analysis, missing data, MI, TS-ML, bootstrap, auxiliary variables}, -} - -@Book{Eddelbuettel-2013, - author = {Dirk Eddelbuettel}, - date = {2013}, - title = {Seamless {R} and {C++} integration with {Rcpp}}, - doi = {10.1007/978-1-4614-6868-4}, - isbn = {978-1-4614-6868-4}, - publisher = {Springer New York}, - abstract = {Illustrates a range of statistical computations in R using the Rcpp package. Provides a general introduction to extending R with C++ code. Features an appendix for R users new to the C++ programming language Rcpp packages are presented in the context of useful application case studies.}, - annotation = {r, r-packages}, -} - -@Book{Enders-2010, - author = {Craig K. Enders}, - date = {2010-05-31}, - title = {Applied missing data analysis}, - isbn = {9781606236390}, - pagetotal = {377}, - library = {HA29 .E497 2010}, - addendum = {https://lccn.loc.gov/2010008465}, - abstract = {Walking readers step by step through complex concepts, this book translates missing data techniques into something that applied researchers and graduate students can understand and utilize in their own research. Enders explains the rationale and procedural details for maximum likelihood estimation, Bayesian estimation, multiple imputation, and models for handling missing not at random (MNAR) data. Easy-to-follow examples and small simulated data sets illustrate the techniques and clarify the underlying principles. The companion website (www.appliedmissingdata.com) includes data files and syntax for the examples in the book as well as up-to-date information on software. The book is accessible to substantive researchers while providing a level of detail that will satisfy quantitative specialists.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Missing observations (Statistics), Social sciences--Research--Methodology}, -} - -@InBook{Koopman-Howe-Hollenbeck-2014, - author = {Joel Koopman and Michael Howe and John R. Hollenbeck}, - booktitle = {More statistical and methodological myths and urban legends: Doctrine, verity and fable in organizational and social sciences}, - date = {2014}, - title = {Pulling the {Sobel} test up by its bootstraps}, - bookauthor = {Charles E. Lance and Robert J. Vandenberg}, - isbn = {9780203775851}, - pages = {224--243}, - doi = {10.4324/9780203775851 }, - isbn = {9780203775851}, - abstract = {In the domain of building and testing theory, mediation relationships are among the most important that can be proposed. Mediation helps to explicate our theoretical models (Leavitt, Mitchell, \& Peterson, 2010) and addresses the fundamental question of why two constructs are related (Whetten, 1989). One of the better-known methods for testing mediation is commonly referred to as the ``Sobel test,'' named for the researcher who derived a standard error (Sobel, 1982) to test the significance of the indirect effect. Recently, a number of different research teams (e.g., Preacher \& Hayes, 2004; Shrout \& Bolger, 2002) have criticized the Sobel test because this standard error requires an assumption of normality for the indirect effect sampling distribution. This distribution tends to be positively skewed (i.e,. not normal), particularly in small samples, and so this assumption can be problematic (Preacher \& Hayes, 2004; Stone \& Sobel, 1990). As a result, the statistical power of the Sobel test may be lessened in these contexts (Preacher \& Hayes 2004; Shrout \& Bolger, 2002). In light of this concern, some scholars have advocated instead for the use of bootstrapping to test the significance of the indirect effect (e.g.. Shrout \& Bolger 2002). Bootstrapping requires no a priori assumption about the shape of the sampling distribution because this distribution is empirically estimated using a resampling procedure (Efron \& Tibshirani, 1993). As a result, departures from normality are less troublesome when creating a confidence interval for the indirect effect. For this reason, bootstrapping is now widely believed to be inherently superior to the Sobel test when testing the significance of the indirect effect in organizational research. Our position is that this belief constitutes an urban legend. As with all statistical urban legends, there is an underlying kernel of truth to the belief that bootstrapping is superior to the Sobel test. However, as we discuss in this chapter, there are several reasons to be concerned with a broad belief in the superiority of bootstrapping. We begin with a brief overview of mediation testing focusing on the Sobel test and bootstrapping and then explain the underlying kernel of truth that has propelled bootstrapping to the forefront of mediation testing in organizational research. Subsequently, we discuss four areas of concern that cast doubt on the belief of the inherent superiority of bootstrapping. Finally, we conclude with recommendations concerning the future of mediation testing in organizational research.}, - publisher = {Routledge/Taylor \& Francis Group}, -} - -@Book{Little-Rubin-2019, - author = {Roderick J. A. Little and Donald B. Rubin}, - date = {2019-04}, - title = {Statistical analysis with missing data}, - doi = {10.1002/9781119482260}, - edition = {3}, - isbn = {9781119482260}, - library = {QA276}, - addendum = {https://lccn.loc.gov/2018061330}, - abstract = {An up-to-date, comprehensive treatment of a classic text on missing data in statistics. - The topic of missing data has gained considerable attention in recent decades. This new edition by two acknowledged experts on the subject offers an up-to-date account of practical methodology for handling missing data problems. Blending theory and application, authors Roderick Little and Donald Rubin review historical approaches to the subject and describe simple methods for multivariate analysis with missing values. They then provide a coherent theory for analysis of problems based on likelihoods derived from statistical models for the data and the missing data mechanism, and then they apply the theory to a wide range of important missing data problems. - Statistical Analysis with Missing Data, Third Edition starts by introducing readers to the subject and approaches toward solving it. It looks at the patterns and mechanisms that create the missing data, as well as a taxonomy of missing data. It then goes on to examine missing data in experiments, before discussing complete-case and available-case analysis, including weighting methods. The new edition expands its coverage to include recent work on topics such as nonresponse in sample surveys, causal inference, diagnostic methods, and sensitivity analysis, among a host of other topics. - \begin{itemize} \item An updated ``classic'' written by renowned authorities on the subject \item Features over 150 exercises (including many new ones) \item Covers recent work on important methods like multiple imputation, robust alternatives to weighting, and Bayesian methods \item Revises previous topics based on past student feedback and class experience \item Contains an updated and expanded bibliography \end{itemize} - The authors were awarded The Karl Pearson Prize in 2017 by the International Statistical Institute, for a research contribution that has had profound influence on statistical theory, methodology or applications. Their work ``has been no less than defining and transforming.'' (ISI) - Statistical Analysis with Missing Data, Third Edition is an ideal textbook for upper undergraduate and/or beginning graduate level students of the subject. It is also an excellent source of information for applied statisticians and practitioners in government and industry.}, - publisher = {Wiley}, - keywords = {Mathematical statistics, Mathematical statistics--Problems, exercises, etc., Missing observations (Statistics), Missing observations (Statistics)--Problems, exercises, etc.}, -} - -@Book{Pawitan-2013, - author = {Yudi Pawitan}, - date = {2013-01-17}, - title = {In all likelihood: Statistical modelling and inference using likelihood}, - isbn = {9780199671229}, - pagetotal = {544}, - abstract = {Based on a course in the theory of statistics this text concentrates on what can be achieved using the likelihood/Fisherian method of taking account of uncertainty when studying a statistical problem. It takes the concept ot the likelihood as providing the best methods for unifying the demands of statistical modelling and the theory of inference. Every likelihood concept is illustrated by realistic examples, which are not compromised by computational problems. Examples range from a simile comparison of two accident rates, to complex studies that require generalised linear or semiparametric modelling. - The emphasis is that the likelihood is not simply a device to produce an estimate, but an important tool for modelling. The book generally takes an informal approach, where most important results are established using heuristic arguments and motivated with realistic examples. With the currently available computing power, examples are not contrived to allow a closed analytical solution, and the book can concentrate on the statistical aspects of the data modelling. In addition to classical likelihood theory, the book covers many modern topics such as generalized linear models and mixed models, non parametric smoothing, robustness, the EM algorithm and empirical likelihood.}, - publisher = {Oxford University Press}, -} - -@Book{vanBuuren-2018, - author = {Stef {van Buuren}}, - date = {2018-07}, - title = {Flexible imputation of missing data}, - doi = {10.1201/9780429492259}, - edition = {2}, - isbn = {9780429492259}, - publisher = {Chapman and Hall/{CRC}}, - library = {QA278}, - addendum = {https://lccn.loc.gov/2019719619}, - abstract = {Missing data pose challenges to real-life data analysis. Simple ad-hoc fixes, like deletion or mean imputation, only work under highly restrictive conditions, which are often not met in practice. Multiple imputation replaces each missing value by multiple plausible values. The variability between these replacements reflects our ignorance of the true (but missing) value. Each of the completed data set is then analyzed by standard methods, and the results are pooled to obtain unbiased estimates with correct confidence intervals. Multiple imputation is a general approach that also inspires novel solutions to old problems by reformulating the task at hand as a missing-data problem. - This is the second edition of a popular book on multiple imputation, focused on explaining the application of methods through detailed worked examples using the MICE package as developed by the author. This new edition incorporates the recent developments in this fast-moving field. - This class-tested book avoids mathematical and technical details as much as possible: formulas are accompanied by verbal statements that explain the formula in accessible terms. The book sharpens the reader’s intuition on how to think about missing data, and provides all the tools needed to execute a well-grounded quantitative analysis in the presence of missing data.}, - keywords = {Multivariate analysis, Multiple imputation (Statistics), Missing observations (Statistics)}, -} - -@InCollection{Zhang-Wang-Tong-2015, - author = {Zhiyong Zhang and Lijuan Wang and Xin Tong}, - booktitle = {Quantitative Psychology Research}, - date = {2015}, - title = {Mediation analysis with missing data through multiple imputation and bootstrap}, - doi = {10.1007/978-3-319-19977-1_24}, - pages = {341--355}, - abtract = {A method using multiple imputation and bootstrap for dealing with missing data in mediation analysis is introduced and implemented in both SAS and R. Through simulation studies, it is shown that the method performs well for both MCAR and MAR data without and with auxiliary variables. It is also shown that the method can work for MNAR data if auxiliary variables related to missingness are included. The application of the method is demonstrated through the analysis of a subset of data from the National Longitudinal Survey of Youth. Mediation analysis with missing data can be conducted using the provided SAS macros and R package bmem.}, - publisher = {Springer International Publishing}, - keywords = {mediation analysis, missing data, multiple imputation, bootstrap}, -} - -@Report{Jones-Waller-2013b, - author = {Jeff A. Jones and Niels G. Waller}, - date = {2013-05-25}, - institution = {University of Minnesota-Twin Cities}, - title = {The normal-theory and asymptotic distribution-free ({ADF}) covariance matrix of standardized regression coefficients: Theoretical extensions and finite sample behavior}, - type = {techreport}, - url = {http://users.cla.umn.edu/~nwaller/downloads/techreports/TR052913.pdf}, - urldate = {2022-07-22}, - abstract = {Yuan and Chan (2011) recently showed how to compute the covariance matrix of standardized regression coefficients from covariances. In this paper, we describe a new method for computing this covariance matrix from correlations. We then show that Yuan and Chan's original equations can also be used when only correlational data are available. Next, we describe an asymptotic distribution-free (ADF; Browne, 1984) method for computing the covariance matrix of standardized regression coefficients. We show that theADF method works well with non-normal data in moderate-to-large samples using both simulated and real-data examples. Finally, we provide R code (R Development Core Team, 2012) in an Appendix to make these methods accessible to applied researchers.}, -} - -@Manual{Muthen-Muthen-2017, - author = {Linda K. Muth{\a'e}n and Bengt O. Muth{\a'e}n}, - date = {2017}, - title = {{Mplus} user’s guide. {Eighth} edition}, - location = {Los Angeles, CA}, - publisher = {{Muth\'en} \& {Muth\'en}}, - annotation = {sem, sem-software}, -} - -@Article{Cheung-2021, - author = {Mike W.-L. Cheung}, - date = {2021-06}, - journaltitle = {Alcohol and Alcoholism}, - title = {Synthesizing indirect effects in mediation models with meta-analytic methods}, - doi = {10.1093/alcalc/agab044}, - number = {1}, - pages = {5--15}, - volume = {57}, - abstract = {Aims - A mediator is a variable that explains the underlying mechanism between an independent variable and a dependent variable. The indirect effect indicates the effect from the predictor to the outcome variable via the mediator. In contrast, the direct effect represents the predictor's effort on the outcome variable after controlling for the mediator. - Methods - A single study rarely provides enough evidence to answer research questions in a particular domain. Replications are generally recommended as the gold standard to conduct scientific research. When a sufficient number of studies have been conducted addressing similar research questions, a meta-analysis can be used to synthesize those studies' findings. - Results - The main objective of this paper is to introduce two frameworks to integrating studies using mediation analysis. The first framework involves calculating standardized indirect effects and direct effects and conducting a multivariate meta-analysis on those effect sizes. The second one uses meta-analytic structural equation modeling to synthesize correlation matrices and fit mediation models on the average correlation matrix. We illustrate these procedures on a real dataset using the R statistical platform. - Conclusion - This paper closes with some further directions for future studies.}, - publisher = {Oxford University Press ({OUP})}, - keywords = {heterogeneity, gold standard, outcome variable, datasets, mediation analysis}, -} - -@Article{Cheung-Pesigan-2023a, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-01}, - journaltitle = {Multivariate Behavioral Research}, - title = {{FINDOUT}: Using either {SPSS} commands or graphical user interface to identify influential cases in structural equation modeling in {AMOS}}, - doi = {10.1080/00273171.2022.2148089}, - pages = {1--5}, - abstract = {The results in a structural equation modeling (SEM) analysis can be influenced by just a few observations, called influential cases. Tools have been developed for users of R to identify them. However, similar tools are not available for AMOS, which is also a popular SEM software package. We introduce the FINDOUT toolset, a group of SPSS extension commands, and an AMOS plugin, to identify influential cases and examine how these cases influence the results. The SPSS commands can be used either as syntax commands or as custom dialogs from pull-down menus, and the AMOS plugin can be run from AMOS pull-down menu. We believe these tools can help researchers to examine the robustness of their findings to influential cases.}, - publisher = {Informa {UK} Limited}, - keywords = {influential cases, outliers, structural equation modeling, AMOS, sensitivity analysis, SPSS}, -} - -@Article{Cheung-Pesigan-2023b, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan}, - date = {2023-05}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {{semlbci}: An {R} package for forming likelihood-based confidence intervals for parameter estimates, correlations, indirect effects, and other derived parameters}, - doi = {10.1080/10705511.2023.2183860}, - pages = {1--15}, - abstract = {There are three common types of confidence interval (CI) in structural equation modeling (SEM): Wald-type CI, bootstrapping CI, and likelihood-based CI (LBCI). LBCI has the following advantages: (1) it has better coverage probabilities and Type I error rate compared to Wald-type CI when the sample size is finite; (2) it correctly tests the null hypothesis of a parameter based on likelihood ratio chi-square difference test; (3) it is less computationally intensive than bootstrapping CI; and (4) it is invariant to transformations. However, LBCI is not available in many popular SEM software packages. We developed an R package, semlbci, for forming LBCI for parameters in models fitted by lavaan, a popular open-source SEM package, such that researchers have more options in forming CIs for parameters in SEM. The package supports both unstandardized and standardized estimates, derived parameters such as indirect effect, multisample models, and the robust LBCI proposed by Falk.}, - publisher = {Informa {UK} Limited}, - keywords = {confidence interval, likelihood-based confidence interval, robust method, structural equation modeling}, - annotation = {r, r-packages, sem, sem-software, lb}, -} - -@Article{Cheung-Pesigan-Vong-2022, - author = {Shu Fai Cheung and Ivan Jacob Agaloos Pesigan and Weng Ngai Vong}, - date = {2022-03}, - journaltitle = {Behavior Research Methods}, - title = {{DIY} bootstrapping: Getting the nonparametric bootstrap confidence interval in {SPSS} for any statistics or function of statistics (when this bootstrapping is appropriate)}, - doi = {10.3758/s13428-022-01808-5}, - number = {2}, - pages = {474--490}, - volume = {55}, - abstract = {Researchers can generate bootstrap confidence intervals for some statistics in SPSS using the BOOTSTRAP command. However, this command can only be applied to selected procedures, and only to selected statistics in these procedures. We developed an extension command and prepared some sample syntax files based on existing approaches from the Internet to illustrate how researchers can (a) generate a large number of nonparametric bootstrap samples, (b) do desired analysis on all these samples, and (c) form the bootstrap confidence intervals for selected statistics using the OMS commands. We developed these tools to help researchers apply nonparametric bootstrapping to any statistics for which this method is appropriate, including statistics derived from other statistics, such as standardized effect size measures computed from the t test results. We also discussed how researchers can extend the tools for other statistics and scenarios they encounter.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {bootstrapping, effect sizes, confidence intervals}, -} - -@Article{Li-Oravecz-Zhou-etal-2022, - author = {Yanling Li and Zita Oravecz and Shuai Zhou and Yosef Bodovski and Ian J. Barnett and Guangqing Chi and Yuan Zhou and Naomi P. Friedman and Scott I. Vrieze and Sy-Miin Chow}, - date = {2022-01}, - journaltitle = {Psychometrika}, - title = {{Bayesian} forecasting with a regime-switching zero-inflated multilevel poisson regression model: An application to adolescent alcohol use with spatial covariates}, - doi = {10.1007/s11336-021-09831-9}, - number = {2}, - pages = {376--402}, - volume = {87}, - abstract = {In this paper, we present and evaluate a novel Bayesian regime-switching zero-inflated multilevel Poisson (RS-ZIMLP) regression model for forecasting alcohol use dynamics. The model partitions individuals’ data into two phases, known as regimes, with: (1) a zero-inflation regime that is used to accommodate high instances of zeros (non-drinking) and (2) a multilevel Poisson regression regime in which variations in individuals’ log-transformed average rates of alcohol use are captured by means of an autoregressive process with exogenous predictors and a person-specific intercept. The times at which individuals are in each regime are unknown, but may be estimated from the data. We assume that the regime indicator follows a first-order Markov process as related to exogenous predictors of interest. The forecast performance of the proposed model was evaluated using a Monte Carlo simulation study and further demonstrated using substance use and spatial covariate data from the Colorado Online Twin Study (CoTwins). Results showed that the proposed model yielded better forecast performance compared to a baseline model which predicted all cases as non-drinking and a reduced ZIMLP model without the RS structure, as indicated by higher AUC (the area under the receiver operating characteristic (ROC) curve) scores, and lower mean absolute errors (MAEs) and root-mean-square errors (RMSEs). The improvements in forecast performance were even more pronounced when we limited the comparisons to participants who showed at least one instance of transition to drinking. }, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Bayesian zero-inflated Poisson model, forecast, intensive longitudinal data, regime-switching, spatial data, substance use}, - annotation = {bayesian, ild}, -} - -@Article{McNeish-MacKinnon-2022, - author = {Daniel McNeish and David P. MacKinnon}, - date = {2022-12}, - journaltitle = {Psychological Methods}, - title = {Intensive longitudinal mediation in {Mplus}}, - doi = {10.1037/met0000536}, - abstract = {Much of the existing longitudinal mediation literature focuses on panel data where relatively few repeated measures are collected over a relatively broad timespan. However, technological advances in data collection (e.g., smartphones, wearables) have led to a proliferation of short duration, densely collected longitudinal data in behavioral research. These intensive longitudinal data differ in structure and focus relative to traditionally collected panel data. As a result, existing methodological resources do not necessarily extend to nuances present in the recent influx of intensive longitudinal data and designs. In this tutorial, we first cover potential limitations of traditional longitudinal mediation models to accommodate unique characteristics of intensive longitudinal data. Then, we discuss how recently developed dynamic structural equation models (DSEMs) may be well-suited for mediation modeling with intensive longitudinal data and can overcome some of the limitations associated with traditional approaches. We describe four increasingly complex intensive longitudinal mediation models: (a) stationary models where the indirect effect is constant over time and people, (b) person-specific models where the indirect effect varies across people, (c) dynamic models where the indirect effect varies across time, and (d) cross-classified models where the indirect effect varies across both time and people. We apply each model to a running example featuring a mobile health intervention designed to improve health behavior of individuals with binge eating disorder. In each example, we provide annotated Mplus code and interpretation of the output to guide empirical researchers through mediation modeling with this increasingly popular type of longitudinal data.}, - publisher = {American Psychological Association ({APA})}, - keywords = {intensive longitudinal data, time-series, mediation, EMA, daily diary}, - annotation = {ild, ild-mediation, ild-software}, -} - -@Article{Nust-Eddelbuettel-Bennett-etal-2020, - author = {Daniel N{\"u}st and Dirk Eddelbuettel and Dom Bennett and Robrecht Cannoodt and Dav Clark and Gergely Dar{\a'o}czi and Mark Edmondson and Colin Fay and Ellis Hughes and Lars Kjeldgaard and Sean Lopp and Ben Marwick and Heather Nolis and Jacqueline Nolis and Hong Ooi and Karthik Ram and Noam Ross and Lori Shepherd and P{\a'e}ter S{\a'o}lymos and Tyson Lee Swetnam and Nitesh Turaga and Charlotte {Van Petegem} and Jason Williams and Craig Willis and Nan Xiao}, - date = {2020}, - journaltitle = {The R Journal}, - title = {The {Rockerverse}: Packages and applications for containerisation with {R}}, - doi = {10.32614/rj-2020-007}, - number = {1}, - pages = {437}, - volume = {12}, - abstract = {The Rocker Project provides widely used Docker images for R across different application scenarios. This article surveys downstream projects that build upon the Rocker Project images and presents the current state of R packages for managing Docker images and controlling containers. These use cases cover diverse topics such as package development, reproducible research, collaborative work, cloud-based data processing, and production deployment of services. The variety of applications demonstrates the power of the Rocker Project specifically and containerisation in general. Across the diverse ways to use containers, we identified common themes: reproducible environments, scalability and efficiency, and portability across clouds. We conclude that the current growth and diversification of use cases is likely to continue its positive impact, but see the need for consolidating the Rockerverse ecosystem of packages, developing common practices for applications, and exploring alternative containerisation software.}, - publisher = {The R Foundation}, - annotation = {container, container-docker, container-rocker}, -} - -@Article{Pesigan-Cheung-2020, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2020-12}, - journaltitle = {Frontiers in Psychology}, - title = {{SEM}-based methods to form confidence intervals for indirect effect: Still applicable given nonnormality, under certain conditions}, - doi = {10.3389/fpsyg.2020.571928}, - volume = {11}, - abstract = {A SEM-based approach using likelihood-based confidence interval (LBCI) has been proposed to form confidence intervals for unstandardized and standardized indirect effect in mediation models. However, when used with the maximum likelihood estimation, this approach requires that the variables are multivariate normally distributed. This can affect the LBCIs of unstandardized and standardized effect differently. In the present study, the robustness of this approach when the predictor is not normally distributed but the error terms are conditionally normal, which does not violate the distributional assumption of ordinary least squares (OLS) estimation, is compared to four other approaches: nonparametric bootstrapping, two variants of LBCI, LBCI assuming the predictor is fixed (LBCI-Fixed-X) and LBCI based on ADF estimation (LBCI-ADF), and Monte Carlo. A simulation study was conducted using a simple mediation model and a serial mediation model, manipulating the distribution of the predictor. The Monte Carlo method performed worst among the methods. LBCI and LBCI-Fixed-X had suboptimal performance when the distributions had high kurtosis and the population indirect effects were medium to large. In some conditions, the problem was severe even when the sample size was large. LBCI-ADF and nonparametric bootstrapping had coverage probabilities close to the nominal value in nearly all conditions, although the coverage probabilities were still suboptimal for the serial mediation model when the sample size was small with respect to the model. Implications of these findings in the context of this special case of nonnormal data were discussed.}, - publisher = {Frontiers Media {SA}}, - keywords = {mediation, nonnormal, confidence interval, structural equation modeling, bootstrapping}, -} - -@Article{Pesigan-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Shu Fai Cheung}, - date = {2023-08}, - journaltitle = {Behavior Research Methods}, - title = {{Monte Carlo} confidence intervals for the indirect effect with missing data}, - doi = {10.3758/s13428-023-02114-4}, - abstract = {Missing data is a common occurrence in mediation analysis. As a result, the methods used to construct confidence intervals around the indirect effect should consider missing data. Previous research has demonstrated that, for the indirect effect in data with complete cases, the Monte Carlo method performs as well as nonparametric bootstrap confidence intervals (see MacKinnon et al., Multivariate Behavioral Research, 39(1), 99–128, 2004; Preacher \& Selig, Communication Methods and Measures, 6(2), 77–98, 2012; Tofighi \& MacKinnon, Structural Equation Modeling: A Multidisciplinary Journal, 23(2), 194–205, 2015). In this manuscript, we propose a simple, fast, and accurate two-step approach for generating confidence intervals for the indirect effect, in the presence of missing data, based on the Monte Carlo method. In the first step, an appropriate method, for example, full-information maximum likelihood or multiple imputation, is used to estimate the parameters and their corresponding sampling variance-covariance matrix in a mediation model. In the second step, the sampling distribution of the indirect effect is simulated using estimates from the first step. A confidence interval is constructed from the resulting sampling distribution. A simulation study with various conditions is presented. Implications of the results for applied research are discussed.}, - publisher = {Springer Science and Business Media {LLC}}, - keywords = {Monte Carlo method, nonparametric bootstrap, indirect effect, mediation, missing completely at random, missing at random, full-information maximum likelihood, multiple imputation}, - annotation = {mediation, mediation-montecarlo, mediation-bootstrap, semmcci}, -} - -@Article{Pesigan-Sun-Cheung-2023, - author = {Ivan Jacob Agaloos Pesigan and Rong Wei Sun and Shu Fai Cheung}, - date = {2023-04}, - journaltitle = {Multivariate Behavioral Research}, - title = {{betaDelta} and {betaSandwich}: Confidence intervals for standardized regression coefficients in {R}}, - doi = {10.1080/00273171.2023.2201277}, - pages = {1--4}, - abstract = {The multivariate delta method was used by Yuan and Chan to estimate standard errors and confidence intervals for standardized regression coefficients. Jones and Waller extended the earlier work to situations where data are nonnormal by utilizing Browne’s asymptotic distribution-free (ADF) theory. Furthermore, Dudgeon developed standard errors and confidence intervals, employing heteroskedasticity-consistent (HC) estimators, that are robust to nonnormality with better performance in smaller sample sizes compared to Jones and Waller’s ADF technique. Despite these advancements, empirical research has been slow to adopt these methodologies. This can be a result of the dearth of user-friendly software programs to put these techniques to use. We present the betaDelta and the betaSandwich packages in the R statistical software environment in this manuscript. Both the normal-theory approach and the ADF approach put forth by Yuan and Chan and Jones and Waller are implemented by the betaDelta package. The HC approach proposed by Dudgeon is implemented by the betaSandwich package. The use of the packages is demonstrated with an empirical example. We think the packages will enable applied researchers to accurately assess the sampling variability of standardized regression coefficients.}, - publisher = {Informa {UK} Limited}, - keywords = {standardized regression coefficients, confidence intervals, delta method standard errors, heteroskedasticity-consistent standard errors, R package}, - annotation = {r, r-packages}, -} - -@Article{Savalei-Rosseel-2021, - author = {Victoria Savalei and Yves Rosseel}, - date = {2021-10}, - journaltitle = {Structural Equation Modeling: A Multidisciplinary Journal}, - title = {Computational options for standard errors and test statistics with incomplete normal and nonnormal data in {SEM}}, - doi = {10.1080/10705511.2021.1877548}, - number = {2}, - pages = {163--181}, - volume = {29}, - abstract = {This article provides an overview of different computational options for inference following normal theory maximum likelihood (ML) estimation in structural equation modeling (SEM) with incomplete normal and nonnormal data. Complete data are covered as a special case. These computational options include whether the information matrix is observed or expected, whether the observed information matrix is estimated numerically or using an analytic asymptotic approximation, and whether the information matrix and the outer product matrix of the score vector are evaluated at the saturated or at the structured estimates. A variety of different standard errors and robust test statistics become possible by varying these options. We review the asymptotic properties of these computational variations, and we show how to obtain them using lavaan in R. We hope that this article will encourage methodologists to study the impact of the available computational options on the performance of standard errors and test statistics in SEM.}, - publisher = {Informa {UK} Limited}, - keywords = {incomplete data, nonnormal data, robust corrections, software implementation}, -} - -@Article{Tofighi-Kelley-2020, - author = {Davood Tofighi and Ken Kelley}, - date = {2020}, - journaltitle = {Psychological Methods}, - title = {Improved inference in mediation analysis: Introducing the model-based constrained optimization procedure}, - doi = {10.1037/met0000259}, - pages = {496--515}, - volume = {25}, - abstract = {Mediation analysis is an important approach for investigating causal pathways. One approach used in mediation analysis is the test of an indirect effect, which seeks to measure how the effect of an independent variable impacts an outcome variable through one or more mediators. However, in many situations the proposed tests of indirect effects, including popular confidence interval-based methods, tend to produce poor Type I error rates when mediation does not occur and, more generally, only allow dichotomous decisions of ``not significant'' or ``significant'' with regards to the statistical conclusion. To remedy these issues, we propose a new method, a likelihood ratio test (LRT), that uses non-linear constraints in what we term the model-based constrained optimization (MBCO) procedure. The MBCO procedure (a) offers a more robust Type I error rate than existing methods; (b) provides a p-value, which serves as a continuous measure of compatibility of data with the hypothesized null model (not just a dichotomous reject or fail-to-reject decision rule); (c) allows simple and complex hypotheses about mediation (i.e., one or more mediators; different mediational pathways), and (d) allows the mediation model to use observed or latent variables. The MBCO procedure is based on a structural equation modeling framework (even if latent variables are not specified) with specialized fitting routines, namely with the use of non-linear constraints. We advocate using the MBCO procedure to test hypotheses about an indirect effect in addition to reporting a confidence interval to capture uncertainty about the indirect effect because this combination transcends existing methods.}, - publisher = {{American Psychological Association ({APA})}}, -} - -@Article{Wang-Zhang-2020, - author = {Lijuan Wang and Qian Zhang}, - date = {2020-06}, - journaltitle = {Psychological Methods}, - title = {Investigating the impact of the time interval selection on autoregressive mediation modeling: Result interpretations, effect reporting, and temporal designs}, - doi = {10.1037/met0000235}, - number = {3}, - pages = {271--291}, - volume = {25}, - abstract = {This study investigates the impact of the time interval (the time passed between 2 consecutive measurements) selection on autoregressive mediation modeling (AMM). For a widely used autoregressive mediation model, via analytical derivations, we explained why and how the conventionally reported time-specific coefficient estimates (e.g., $\hat{a} \hat{b}$ and $\hat{c}^{\prime}$ ) and inference results in AMM provide limited information and can arrive in even misleading conclusions about direct and indirect effects over time. Furthermore, under the stationarity assumption, we proposed an approach to calculate the overall direct and indirect effect estimates over time and the time lag lengths at which they reach maxima, using AMM results. The derivation results revealed that the overall direct and indirect effect curves are asymptotically invariant to the time interval selection, under stationarity. With finite samples and thus sampling errors and potential computing problems, however, our simulation results revealed that the overall indirect effect curves were better recovered when the time interval is selected to be closer to half of the time lag length at which the overall indirect effect reaches its maximum. An R function and an R Shiny app were developed to obtain the overall direct and indirect effect curves over time and facilitate the time interval selection using AMM results. Our findings provide another look at the connections between AMM and continuous time mediation modeling and the connections are discussed.}, - publisher = {American Psychological Association ({APA})}, - keywords = {longitudinal mediation, autoregressive mediation modeling, time interval selection, time-specific indirect effect, overall indirect effect}, - annotation = {ild, ild-mediation}, -} - -@Book{Hayes-2022, - author = {Andrew F. Hayes}, - date = {2022}, - title = {Introduction to mediation, moderation, and conditional process analysis: A regression-based approach}, - series = {Methodology in the social sciences}, - edition = {3}, - isbn = {9781462549030}, - pages = {732}, - library = {HA31.3 .H39 2022}, - addendum = {https://lccn.loc.gov/2021031108}, - abstract = {Lauded for its easy-to-understand, conversational discussion of the fundamentals of mediation, moderation, and conditional process analysis, this book has been fully revised with 50\% new content, including sections on working with multicategorical antecedent variables, the use of PROCESS version 3 for SPSS and SAS for model estimation, and annotated PROCESS v3 outputs. Using the principles of ordinary least squares regression, Andrew F. Hayes carefully explains procedures for testing hypotheses about the conditions under and the mechanisms by which causal effects operate, as well as the moderation of such mechanisms. Hayes shows how to estimate and interpret direct, indirect, and conditional effects; probe and visualize interactions; test questions about moderated mediation; and report different types of analyses. Data for all the examples are available on the companion website (www.afhayes.com) along with links to download PROCESS.}, - publisher = {Guilford Publications}, - keywords = {Social sciences--Statistical methods, Mediation (Statistics), Regression analysis}, -} - -@Manual{Arbuckle-2020, - author = {James L. Arbuckle}, - date = {2020}, - title = {Amos 27.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Manual{Arbuckle-2021, - author = {James L. Arbuckle}, - date = {2021}, - title = {Amos 28.0 user's guide}, - location = {Chicago}, - publisher = {IBM SPSS}, - annotation = {sem, sem-software}, -} - -@Report{Asparouhov-Muthen-2022, - author = {Tihomir Asparouhov and Bengt O. Muth{\a'e}n}, - date = {2022}, - title = {Multiple imputation with {Mplus}}, - type = {techreport}, - url = {http://www.statmodel.com/download/Imputations7.pdf}, - institution = {http://www.statmodel.com}, -} - -@Manual{Eddelbuettel-Francois-Allaire-etal-2023, - title = {{Rcpp}: Seamless {R} and {C++} Integration}, - author = {Dirk Eddelbuettel and Romain Francois and JJ Allaire and Kevin Ushey and Qiang Kou and Nathan Russell and Inaki Ucar and Douglas Bates and John Chambers}, - year = {2023}, - note = {R package version 1.0.11}, - url = {https://CRAN.R-project.org/package=Rcpp}, - annotation = {r, r-package}, -} - -@Manual{Jorgensen-Pornprasertmanit-Schoemann-etal-2022, - title = {{semTools}: Useful tools for structural equation modeling}, - author = {Terrence D. Jorgensen and Sunthud Pornprasertmanit and Alexander M. Schoemann and Yves Rosseel}, - year = {2022}, - note = {R package version 0.5-6}, - url = {https://CRAN.R-project.org/package=semTools}, -} - -@Misc{Kurtzer-cclerget-Bauer-etal-2021, - author = {Gregory M. Kurtzer and {cclerget} and Michael Bauer and Ian Kaneshiro and David Trudgian and David Godlove}, - date = {2021}, - title = {{hpcng/singularity: Singularity 3.7.3}}, - doi = {10.5281/ZENODO.1310023}, - copyright = {Open Access}, - publisher = {Zenodo}, - annotation = {container, container-singularity}, -} - -@Manual{RCoreTeam-2021, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2021}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2022, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2022}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{RCoreTeam-2023, - title = {{R}: A language and environment for statistical computing}, - author = {{R Core Team}}, - organization = {R Foundation for Statistical Computing}, - date = {2023}, - location = {Vienna, Austria}, - url = {https://www.R-project.org/}, - annotation = {r, r-manual}, -} - -@Manual{Waller-2022, - author = {Niels G. Waller}, - title = {{fungible}: Psychometric functions from the {Waller Lab}}, - year = {2022}, - note = {R package version 2.2.1}, - url = {https://CRAN.R-project.org/package=fungible}, - publisher = {The R Foundation}, - annotation = {r, r-package}, -} - -@PhdThesis{Pesigan-2022, - author = {Ivan Jacob Agaloos Pesigan}, - year = {2022}, - school = {University of Macau}, - title = {Confidence intervals for standardized coefficients: Applied to regression coefficients in primary studies and indirect effects in meta-analytic structural equation modeling}, - type = {phdthesis}, -}