Skip to content

Commit

Permalink
remove weigthing options and rename to consistent PLS (#204)
Browse files Browse the repository at this point in the history
* undo reset

* undo delete stuff in merge conflict

* add construct scores ot data set

* remove some options

* first try at the pls help file
  • Loading branch information
juliuspfadt authored Oct 8, 2024
1 parent e89d059 commit 29c517f
Show file tree
Hide file tree
Showing 7 changed files with 527 additions and 466 deletions.
7 changes: 3 additions & 4 deletions R/PLSSEMWrapper.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ PLSSEM <- function(
benchmark = "none",
bootstrapSamples = 200,
ciLevel = 0.95,
compositeCorrelationDisattenuated = TRUE,
consistentPartialLeastSquares = TRUE,
convergenceCriterion = "absoluteDifference",
correctionFactor = "squaredEuclidean",
correlationMatrix = "pearson",
Expand All @@ -50,8 +50,7 @@ PLSSEM <- function(
robustMethod = "bootstrap",
seed = 1,
setSeed = FALSE,
structuralModelIgnored = FALSE,
weightingApproach = "PLS-PM") {
structuralModelIgnored = FALSE) {

defaultArgCalls <- formals(jaspSem::PLSSEM)
defaultArgs <- lapply(defaultArgCalls, eval)
Expand All @@ -62,7 +61,7 @@ PLSSEM <- function(
options[["data"]] <- NULL
options[["version"]] <- NULL

optionsWithFormula <- c("convergenceCriterion", "correctionFactor", "group", "handlingOfFlippedSigns", "innerWeightingScheme", "models", "weightingApproach")
optionsWithFormula <- c("convergenceCriterion", "correctionFactor", "group", "handlingOfFlippedSigns", "innerWeightingScheme", "models")
for (name in optionsWithFormula) {
if ((name %in% optionsWithFormula) && inherits(options[[name]], "formula")) options[[name]] = jaspBase::jaspFormula(options[[name]], data) }

Expand Down
132 changes: 95 additions & 37 deletions R/plssem.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ PLSSEMInternal <- function(jaspResults, dataset, options, ...) {
.semMardiasCoefficient(modelContainer, dataset, options, ready)
.plsSemReliabilities(modelContainer, dataset, options, ready)
.plsSemCor(modelContainer, options, ready)

.plsAddConstructScores(jaspResults, modelContainer, options, ready)
}

.plsSemPrepOpts <- function(options) {
Expand Down Expand Up @@ -149,15 +151,17 @@ checkCSemModel <- function(model, availableVars) {


.plsSemModelContainer <- function(jaspResults) {

if (!is.null(jaspResults[["modelContainer"]])) {
modelContainer <- jaspResults[["modelContainer"]]
} else {
modelContainer <- createJaspContainer()
modelContainer$dependOn(c("weightingApproach", "correlationMatrix", "convergenceCriterion",
"estimateStructural", "group", "correctionFactor", "compositeCorrelationDisattenuated",
"structuralModelIgnored", "innerWeightingScheme", "errorCalculationMethod", "robustMethod", "bootstrapSamples", "ciLevel",
"setSeed", "seed", "handlingOfInadmissibles", "Data", "handlingOfFlippedSigns", "endogenousIndicatorPrediction",
"kFolds", "repetitions", "benchmark", "predictedScore"))
modelContainer$dependOn(c("syntax", "convergenceCriterion",
"estimateStructural", "group", "consistentPartialLeastSquares",
"structuralModelIgnored", "innerWeightingScheme", "errorCalculationMethod",
"robustMethod", "bootstrapSamples", "ciLevel",
"setSeed", "seed", "handlingOfInadmissibles", "endogenousIndicatorPrediction",
"kFolds", "repetitions", "benchmark", "predictedScore", "models"))
jaspResults[["modelContainer"]] <- modelContainer
}

Expand Down Expand Up @@ -233,16 +237,12 @@ checkCSemModel <- function(model, availableVars) {
}
# resample
fit <- try(cSEM::resamplecSEMResults(.object = fit,
.R = options[["bootstrapSamples"]],
.user_funs = tickFunction,
.resample_method = options[["robustMethod"]],
.handle_inadmissibles = options[["handlingOfInadmissibles"]],
.sign_change_option = switch(options[["handlingOfFlippedSigns"]],
"individualReestimation" = "individual_reestimate",
"constructReestimation" = "construct_reestimate",
options[["handlingOfFlippedSigns"]]
),
.seed = if (options[["setSeed"]]) options[["seed"]]))
.R = options[["bootstrapSamples"]],
.user_funs = tickFunction,
.resample_method = options[["robustMethod"]],
.handle_inadmissibles = options[["handlingOfInadmissibles"]],
.sign_change_option = "none",
.seed = if (options[["setSeed"]]) options[["seed"]]))


if (isTryError(fit)) {
Expand Down Expand Up @@ -278,8 +278,8 @@ checkCSemModel <- function(model, availableVars) {
cSemOpts <- list()

# model features
cSemOpts[[".approach_weights"]] <- options[["weightingApproach"]]
cSemOpts[[".approach_cor_robust"]] <- if (options[["correlationMatrix"]] == "pearson") "none" else options[["correlationMatrix"]]
cSemOpts[[".approach_weights"]] <- "PLS-PM"
cSemOpts[[".approach_cor_robust"]] <- "none"
cSemOpts[[".approach_nl"]] <- options[["approachNonLinear"]]
cSemOpts[[".conv_criterion"]] <- switch(options[["convergenceCriterion"]],
"absoluteDifference" = "diff_absolute",
Expand All @@ -290,16 +290,9 @@ checkCSemModel <- function(model, availableVars) {
cSemOpts[[".PLS_ignore_structural_model"]] <- options[["structuralModelIgnored"]]
cSemOpts[[".PLS_weight_scheme_inner"]] <- options[["innerWeightingScheme"]]

if (options[["compositeCorrelationDisattenuated"]]) {
if (options[["consistentPartialLeastSquares"]]) {
cSemOpts[".disattenuate"] <- TRUE
cSemOpts[".PLS_approach_cf"] <- switch(options[["correctionFactor"]],
"squaredEuclidean" = "dist_squared_euclid",
"weightedEuclidean" = "dist_euclid_weighted",
"fisherTransformed" = "fisher_transformed",
"arithmeticMean" = "mean_arithmetic",
"geometricMean" = "mean_geometric",
"harmonicMean" = "mean_harmonic",
"geometricHarmonicMean" = "geo_of_harmonic")
cSemOpts[".PLS_approach_cf"] <- "dist_squared_euclid"

} else {
cSemOpts[".disattenuate"] <- FALSE
Expand Down Expand Up @@ -1059,19 +1052,19 @@ checkCSemModel <- function(model, availableVars) {
predictcont <- createJaspContainer(name, initCollapsed = TRUE)
}

#Error messages
# Error messages

if (options[["benchmark"]] != "none" && options[["benchmark"]] != "all") {
benchmarks <- options[["benchmark"]]
}
else if (options[["benchmark"]] == "all") {
benchmarks <- c("lm", "PLS-PM", "GSCA", "PCA", "MAXVAR")
benchmarks <- benchmarks[benchmarks != options[["weightingApproach"]]]
benchmarks <- benchmarks[benchmarks != "PLS-PM"]
} else {
benchmarks <- NULL
}

if (options[["benchmark"]] != "none" && options[["benchmark"]] != "all" && benchmarks == options[["weightingApproach"]]) {
if (options[["benchmark"]] != "none" && options[["benchmark"]] != "all" && benchmarks == "PLS-PM") {
errormsg <- gettextf("The target model uses the same weighting approach as the benchmark model, please choose another benchmark.")
modelContainer$setError(errormsg)
modelContainer$dependOn("benchmark")
Expand Down Expand Up @@ -1961,6 +1954,72 @@ checkCSemModel <- function(model, availableVars) {
return()
}


.plsAddConstructScores <- function(jaspResults, modelContainer, options, ready) {

if (!ready ||
!is.null(jaspResults[["addedScoresContainer"]]) ||
modelContainer$getError() ||
!options[["addConstructScores"]])
{
return()
}

container <- createJaspContainer()
container$dependOn(optionsFromObject = modelContainer, options = "addConstructScores")
jaspResults[["addedScoresContainer"]] <- container

models <- modelContainer[["models"]][["object"]]
results <- modelContainer[["results"]][["object"]]

modelNames <- sapply(models, function(x) x[["name"]])
modelNames <- gsub(" ", "_", modelNames)
allNamesR <- c()
# loop over the models
for (i in seq_len(length(results))) {
scores <- cSEM::getConstructScores(results[[i]])$Construct_scores

# then loop over the scores
scoreNames <- colnames(scores)
for (ii in seq_len(ncol(scores))) {

colNameR <- paste0(modelNames[i], "_", scoreNames[ii])

if (jaspBase:::columnExists(colNameR) && !jaspBase:::columnIsMine(colNameR)) {
.quitAnalysis(gettextf("Column '%s' name already exists in the dataset", colNameR))
}

container[[colNameR]] <- jaspBase::createJaspColumn(colNameR)
container[[colNameR]]$setScale(scores[, ii])

# save the names to keep track of all names
allNamesR <- c(allNamesR, colNameR)
}
}

jaspResults[["addedScoresContainer"]] <- container

# check if there are previous colNames that are not needed anymore and delete the cols
oldNames <- jaspResults[["createdColumnNames"]][["object"]]
newNames <- allNamesR
if (!is.null(oldNames)) {
noMatch <- which(!(oldNames %in% newNames))
if (length(noMatch) > 0) {
for (iii in 1:length(noMatch)) {
jaspBase:::columnDelete(oldNames[noMatch[iii]])
}
}
}

# save the created col names
jaspResults[["createdColumnNames"]] <- createJaspState(allNamesR)


return()

}


.plsSEMVIFhelper <- function(fit){
# Make VIFs into a matrix
# Restructure the VIFs into a table.
Expand All @@ -1969,10 +2028,10 @@ checkCSemModel <- function(model, availableVars) {
idx <- which(VIFspath$VIF!=0,arr.ind = T)

if(nrow(idx)!=0){
VIFDf <- data.frame(Relation=paste(rownames(VIFspath$VIF)[idx[,'row']],'~',colnames(VIFspath$VIF)[idx[,'col']]),
vif=VIFspath$VIF[cbind(rownames(VIFspath$VIF)[idx[,'row']],colnames(VIFspath$VIF)[idx[,'col']])])
VIFDf <- data.frame(Relation=paste(rownames(VIFspath$VIF)[idx[,'row']],'~',colnames(VIFspath$VIF)[idx[,'col']]),
vif=VIFspath$VIF[cbind(rownames(VIFspath$VIF)[idx[,'row']],colnames(VIFspath$VIF)[idx[,'col']])])

VIFvector <-setNames(VIFDf$vif, VIFDf$Relation)
VIFvector <-setNames(VIFDf$vif, VIFDf$Relation)
} else{
VIFvector <- NULL
}
Expand All @@ -1988,17 +2047,16 @@ checkCSemModel <- function(model, availableVars) {


if(!is.null(VIFsweights)&sum(VIFsweights)!=0){
idx <- which(VIFsweights!=0,arr.ind = T)
idx <- which(VIFsweights!=0,arr.ind = T)

VIFBDf <- data.frame(Relation=paste(rownames(VIFsweights)[idx[,'row']],'<~',colnames(VIFsweights)[idx[,'col']]),
vif=VIFsweights[cbind(rownames(VIFsweights)[idx[,'row']],colnames(VIFsweights)[idx[,'col']])])
VIFBDf <- data.frame(Relation=paste(rownames(VIFsweights)[idx[,'row']],'<~',colnames(VIFsweights)[idx[,'col']]),
vif=VIFsweights[cbind(rownames(VIFsweights)[idx[,'row']],colnames(VIFsweights)[idx[,'col']])])

VIFBvector <-setNames(VIFBDf$vif, VIFBDf$Relation)
VIFBvector <-setNames(VIFBDf$vif, VIFBDf$Relation)

} else{
VIFBvector <- NULL
}
return(VIFBvector)

}

99 changes: 96 additions & 3 deletions inst/help/PLSSEM.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,101 @@
Partial Least Squares structural equation modeling
============
# Partial Least Squares Structural Equation Modeling (PLS-SEM) in JASP

Perform partial least squares structural equation modeling (PLS-SEM) using `cSEM` (Rademaker & Schuberth, 2020). Go to https://cran.r-project.org/web/packages/cSEM/cSEM.pdf for package information and examples. See also Henseler (2021) for more tutorials using cSEM.
This document explains how to perform Partial Least Squares Structural Equation Modeling (PLS-SEM) in JASP using the various options provided in the user interface.

## 1. Model Setup
---
In the **Model** section, you can specify the structural model by selecting the appropriate grouping variable and setting the syntax for the model.

- **Grouping Variable**: You can select the grouping variable for multi-group analysis. The grouping variable is optional and can be left empty if not required.

## 2. Estimation Options
---
In the **Estimation** section, the following options are available:

- **Consistent Partial Least Squares**: Enables the option to use consistent PLS-SEM, which provides consistency in estimations for reflective constructs.

- **Inner Weighting Scheme**: Choose from the following options to calculate inner weights:
- Path
- Centroid
- Factorial

- **Bias-Corrected Bootstrap**: Activate this option for bias-corrected bootstrapping, which refines the confidence intervals for the estimates.

- **Bootstrap Resampling**: You can adjust the number of bootstrap samples for more precise interval estimations. The default value is set to 5,000.

- **Missing Data Handling**: Options for managing missing data, including pairwise or listwise deletion, are available.

## 3. Output Options
---
The **Output** section allows you to customize the types of output you want to generate:

- **Path Coefficients**: Display the estimated path coefficients.
- **Weights and Loadings**: Shows both the indicator weights and loadings, offering insights into how well the indicators measure their corresponding latent variables.
- **Goodness-of-Fit Measures**: Presents different goodness-of-fit measures like SRMR or NFI.
- **Reliability Measures**: Enables output of reliability measures such as Cronbach's alpha or composite reliability for the constructs.

Additional correlation measures include:
- **Observed and Implied Indicator Correlations**
- **Observed and Implied Construct Correlations**

You can also add **construct scores** to the dataset for further analysis.

## 4. Prediction
---
The **Prediction** section includes options for predicting endogenous indicator scores using cross-validation:

- **Cross-Validation k-Folds**: Choose the number of k-folds for cross-validation, with a default value of 10.
- **Repetitions**: Specify the number of repetitions, with a default value of 10.

You can also select a benchmark to compare predictions against:
- **None**
- **Linear Model (LM)**
- **PLS-PM**
- **GSCA**
- **PCA**
- **MAXVAR**
- **All**

## 5. Output and Interpretation
---

### 5.1 Path Coefficients
The **path coefficients** represent the strength and direction of the relationships between the constructs. These coefficients are similar to regression weights and help in understanding the impact of one latent variable on another. You can also view the **t-values** and **p-values** to assess the significance of these paths.

### 5.2 Indicator Loadings and Weights
This section shows the **loadings** of each indicator on its associated construct, which indicates how well each observed variable measures the latent construct. Loadings close to 1 indicate a strong relationship between the indicator and its construct. **Weights** are presented in the case of formative constructs, showing the relative importance of each indicator.

### 5.3 Model Fit Indices
JASP provides several goodness-of-fit measures to evaluate how well the model fits the data:
- **SRMR (Standardized Root Mean Square Residual)**: A measure of model fit, where lower values (generally below 0.08) indicate a better fit.
- **NFI (Normed Fit Index)**: Ranges from 0 to 1, with higher values representing a better fit.

### 5.4 Reliability Measures
Reliability measures assess the internal consistency of the latent constructs:
- **Cronbach’s Alpha**: A commonly used reliability coefficient; values above 0.7 generally indicate acceptable reliability.
- **Composite Reliability (CR)**: A measure of internal consistency similar to Cronbach’s Alpha but considers different factor loadings.
- **Average Variance Extracted (AVE)**: Represents the amount of variance captured by a construct in relation to the variance due to measurement error. AVE values above 0.5 are generally considered acceptable.

### 5.5 R-Squared (R²)
The R-squared value represents the proportion of variance in the endogenous constructs explained by the model. Higher values indicate better explanatory power. An R-squared value close to 0.7 is considered substantial, while values around 0.3 are moderate.

### 5.6 Cross-Validated Prediction
If the cross-validation option is selected, the results will include predicted scores for the endogenous indicators. The k-fold cross-validation helps in assessing the predictive power of the model. You can compare the model’s predictions with benchmarks like linear regression or PLS-PM.

### 5.7 Construct Scores
You can include the estimated construct scores in the dataset for further analysis. These scores represent the latent variables in the model and can be used for additional analyses outside of SEM.

### 5.8 Bootstrapping Results
If bootstrapping is used, the output includes **bootstrap confidence intervals** for the path coefficients, loadings, and weights. These intervals help in understanding the stability of the parameter estimates.

### 5.9 Prediction Benchmarks
If benchmarks are selected, you can compare the PLS-SEM model with:
- **Linear Model** (LM)
- **Principal Component Analysis** (PCA)
- **Generalized Structured Component Analysis** (GSCA)
- **MAXVAR** (Maximum Variance method)

These benchmarks help in evaluating how well your PLS-SEM model predicts the endogenous variables compared to simpler methods.

References
-------
Expand Down
Loading

0 comments on commit 29c517f

Please sign in to comment.