remove weigthing options and rename to consistent PLS (#204)

* undo reset * undo delete stuff in merge conflict * add construct scores ot data set * remove some options * first try at the pls help file
jasp-stats · Oct 8, 2024 · 29c517f · 29c517f
1 parent e89d059
commit 29c517f
Show file tree

Hide file tree

Showing 7 changed files with 527 additions and 466 deletions.
diff --git a/R/PLSSEMWrapper.R b/R/PLSSEMWrapper.R
@@ -24,7 +24,7 @@ PLSSEM <- function(
           benchmark = "none",
           bootstrapSamples = 200,
           ciLevel = 0.95,
-          compositeCorrelationDisattenuated = TRUE,
+          consistentPartialLeastSquares = TRUE,
           convergenceCriterion = "absoluteDifference",
           correctionFactor = "squaredEuclidean",
           correlationMatrix = "pearson",
@@ -50,8 +50,7 @@ PLSSEM <- function(
           robustMethod = "bootstrap",
           seed = 1,
           setSeed = FALSE,
-          structuralModelIgnored = FALSE,
-          weightingApproach = "PLS-PM") {
+          structuralModelIgnored = FALSE) {
 
    defaultArgCalls <- formals(jaspSem::PLSSEM)
    defaultArgs <- lapply(defaultArgCalls, eval)
@@ -62,7 +61,7 @@ PLSSEM <- function(
    options[["data"]] <- NULL
    options[["version"]] <- NULL
 
-   optionsWithFormula <- c("convergenceCriterion", "correctionFactor", "group", "handlingOfFlippedSigns", "innerWeightingScheme", "models", "weightingApproach")
+   optionsWithFormula <- c("convergenceCriterion", "correctionFactor", "group", "handlingOfFlippedSigns", "innerWeightingScheme", "models")
    for (name in optionsWithFormula) {
       if ((name %in% optionsWithFormula) && inherits(options[[name]], "formula")) options[[name]] = jaspBase::jaspFormula(options[[name]], data)   }
 

diff --git a/R/plssem.R b/R/plssem.R
@@ -41,6 +41,8 @@ PLSSEMInternal <- function(jaspResults, dataset, options, ...) {
   .semMardiasCoefficient(modelContainer, dataset, options, ready)
   .plsSemReliabilities(modelContainer, dataset, options, ready)
   .plsSemCor(modelContainer, options, ready)
+
+  .plsAddConstructScores(jaspResults, modelContainer, options, ready)
 }
 
 .plsSemPrepOpts <- function(options) {
@@ -149,15 +151,17 @@ checkCSemModel <- function(model, availableVars) {
 
 
 .plsSemModelContainer <- function(jaspResults) {
+
   if (!is.null(jaspResults[["modelContainer"]])) {
     modelContainer <- jaspResults[["modelContainer"]]
   } else {
     modelContainer <- createJaspContainer()
-    modelContainer$dependOn(c("weightingApproach", "correlationMatrix", "convergenceCriterion",
-                              "estimateStructural", "group", "correctionFactor", "compositeCorrelationDisattenuated",
-                              "structuralModelIgnored", "innerWeightingScheme", "errorCalculationMethod", "robustMethod", "bootstrapSamples", "ciLevel",
-                              "setSeed", "seed", "handlingOfInadmissibles", "Data", "handlingOfFlippedSigns", "endogenousIndicatorPrediction",
-                              "kFolds", "repetitions", "benchmark", "predictedScore"))
+    modelContainer$dependOn(c("syntax", "convergenceCriterion",
+                              "estimateStructural", "group", "consistentPartialLeastSquares",
+                              "structuralModelIgnored", "innerWeightingScheme", "errorCalculationMethod",
+                              "robustMethod", "bootstrapSamples", "ciLevel",
+                              "setSeed", "seed", "handlingOfInadmissibles", "endogenousIndicatorPrediction",
+                              "kFolds", "repetitions", "benchmark", "predictedScore", "models"))
     jaspResults[["modelContainer"]] <- modelContainer
   }
 
@@ -233,16 +237,12 @@ checkCSemModel <- function(model, availableVars) {
       }
       # resample
       fit <- try(cSEM::resamplecSEMResults(.object = fit,
-                                                 .R = options[["bootstrapSamples"]],
-                                                 .user_funs = tickFunction,
-                                                 .resample_method = options[["robustMethod"]],
-                                                 .handle_inadmissibles = options[["handlingOfInadmissibles"]],
-                                                 .sign_change_option = switch(options[["handlingOfFlippedSigns"]],
-                                                                              "individualReestimation" = "individual_reestimate",
-                                                                              "constructReestimation" = "construct_reestimate",
-                                                                              options[["handlingOfFlippedSigns"]]
-                                                                              ),
-                                                 .seed = if (options[["setSeed"]]) options[["seed"]]))
+                                           .R = options[["bootstrapSamples"]],
+                                           .user_funs = tickFunction,
+                                           .resample_method = options[["robustMethod"]],
+                                           .handle_inadmissibles = options[["handlingOfInadmissibles"]],
+                                           .sign_change_option = "none",
+                                           .seed = if (options[["setSeed"]]) options[["seed"]]))
 
 
       if (isTryError(fit)) {
@@ -278,8 +278,8 @@ checkCSemModel <- function(model, availableVars) {
   cSemOpts <- list()
 
   # model features
-  cSemOpts[[".approach_weights"]]            <- options[["weightingApproach"]]
-  cSemOpts[[".approach_cor_robust"]]         <- if (options[["correlationMatrix"]] == "pearson") "none" else options[["correlationMatrix"]]
+  cSemOpts[[".approach_weights"]]            <- "PLS-PM"
+  cSemOpts[[".approach_cor_robust"]]         <- "none"
   cSemOpts[[".approach_nl"]]                 <- options[["approachNonLinear"]]
   cSemOpts[[".conv_criterion"]]              <- switch(options[["convergenceCriterion"]],
                                                        "absoluteDifference" = "diff_absolute",
@@ -290,16 +290,9 @@ checkCSemModel <- function(model, availableVars) {
   cSemOpts[[".PLS_ignore_structural_model"]] <- options[["structuralModelIgnored"]]
   cSemOpts[[".PLS_weight_scheme_inner"]]     <- options[["innerWeightingScheme"]]
 
-  if (options[["compositeCorrelationDisattenuated"]]) {
+  if (options[["consistentPartialLeastSquares"]]) {
     cSemOpts[".disattenuate"] <- TRUE
-    cSemOpts[".PLS_approach_cf"] <- switch(options[["correctionFactor"]],
-                                           "squaredEuclidean" = "dist_squared_euclid",
-                                           "weightedEuclidean" = "dist_euclid_weighted",
-                                           "fisherTransformed" = "fisher_transformed",
-                                           "arithmeticMean" = "mean_arithmetic",
-                                           "geometricMean" = "mean_geometric",
-                                           "harmonicMean" = "mean_harmonic",
-                                           "geometricHarmonicMean" = "geo_of_harmonic")
+    cSemOpts[".PLS_approach_cf"] <- "dist_squared_euclid"
 
   } else {
     cSemOpts[".disattenuate"] <- FALSE
@@ -1059,19 +1052,19 @@ checkCSemModel <- function(model, availableVars) {
     predictcont <- createJaspContainer(name, initCollapsed = TRUE)
   }
 
-  #Error messages
+  # Error messages
 
   if (options[["benchmark"]] != "none" && options[["benchmark"]] != "all") {
     benchmarks <- options[["benchmark"]]
   }
   else if (options[["benchmark"]] == "all") {
     benchmarks <- c("lm", "PLS-PM", "GSCA", "PCA", "MAXVAR")
-    benchmarks <- benchmarks[benchmarks != options[["weightingApproach"]]]
+    benchmarks <- benchmarks[benchmarks != "PLS-PM"]
   } else {
     benchmarks <- NULL
   }
 
-  if (options[["benchmark"]] != "none" && options[["benchmark"]] != "all" && benchmarks == options[["weightingApproach"]]) {
+  if (options[["benchmark"]] != "none" && options[["benchmark"]] != "all" && benchmarks == "PLS-PM") {
     errormsg <- gettextf("The target model uses the same weighting approach as the benchmark model, please choose another benchmark.")
     modelContainer$setError(errormsg)
     modelContainer$dependOn("benchmark")
@@ -1961,6 +1954,72 @@ checkCSemModel <- function(model, availableVars) {
   return()
 }
 
+
+.plsAddConstructScores <- function(jaspResults, modelContainer, options, ready) {
+
+  if (!ready ||
+      !is.null(jaspResults[["addedScoresContainer"]]) ||
+      modelContainer$getError() ||
+      !options[["addConstructScores"]])
+  {
+    return()
+  }
+
+  container    <- createJaspContainer()
+  container$dependOn(optionsFromObject = modelContainer, options = "addConstructScores")
+  jaspResults[["addedScoresContainer"]] <- container
+
+  models <- modelContainer[["models"]][["object"]]
+  results <- modelContainer[["results"]][["object"]]
+
+  modelNames <- sapply(models, function(x) x[["name"]])
+  modelNames <- gsub(" ", "_", modelNames)
+  allNamesR <- c()
+  # loop over the models
+  for (i in seq_len(length(results))) {
+    scores <- cSEM::getConstructScores(results[[i]])$Construct_scores
+
+    # then loop over the scores
+    scoreNames <- colnames(scores)
+    for (ii in seq_len(ncol(scores))) {
+
+      colNameR <- paste0(modelNames[i], "_", scoreNames[ii])
+
+      if (jaspBase:::columnExists(colNameR) && !jaspBase:::columnIsMine(colNameR)) {
+        .quitAnalysis(gettextf("Column '%s' name already exists in the dataset", colNameR))
+      }
+
+      container[[colNameR]] <- jaspBase::createJaspColumn(colNameR)
+      container[[colNameR]]$setScale(scores[, ii])
+
+      # save the names to keep track of all names
+      allNamesR <- c(allNamesR, colNameR)
+    }
+  }
+
+  jaspResults[["addedScoresContainer"]] <- container
+
+  # check if there are previous colNames that are not needed anymore and delete the cols
+  oldNames <- jaspResults[["createdColumnNames"]][["object"]]
+  newNames <- allNamesR
+  if (!is.null(oldNames)) {
+    noMatch <- which(!(oldNames %in% newNames))
+    if (length(noMatch) > 0) {
+      for (iii in 1:length(noMatch)) {
+        jaspBase:::columnDelete(oldNames[noMatch[iii]])
+      }
+    }
+  }
+
+  # save the created col names
+  jaspResults[["createdColumnNames"]] <- createJaspState(allNamesR)
+
+
+  return()
+
+}
+
+
 .plsSEMVIFhelper <- function(fit){
   # Make VIFs into a matrix
   # Restructure the VIFs into a table.
@@ -1969,10 +2028,10 @@ checkCSemModel <- function(model, availableVars) {
   idx <- which(VIFspath$VIF!=0,arr.ind = T)
 
   if(nrow(idx)!=0){
-  VIFDf <- data.frame(Relation=paste(rownames(VIFspath$VIF)[idx[,'row']],'~',colnames(VIFspath$VIF)[idx[,'col']]),
-                          vif=VIFspath$VIF[cbind(rownames(VIFspath$VIF)[idx[,'row']],colnames(VIFspath$VIF)[idx[,'col']])])
+    VIFDf <- data.frame(Relation=paste(rownames(VIFspath$VIF)[idx[,'row']],'~',colnames(VIFspath$VIF)[idx[,'col']]),
+                        vif=VIFspath$VIF[cbind(rownames(VIFspath$VIF)[idx[,'row']],colnames(VIFspath$VIF)[idx[,'col']])])
 
-  VIFvector <-setNames(VIFDf$vif, VIFDf$Relation)
+    VIFvector <-setNames(VIFDf$vif, VIFDf$Relation)
   } else{
     VIFvector <- NULL
   }
@@ -1988,17 +2047,16 @@ checkCSemModel <- function(model, availableVars) {
 
 
   if(!is.null(VIFsweights)&sum(VIFsweights)!=0){
-  idx <- which(VIFsweights!=0,arr.ind = T)
+    idx <- which(VIFsweights!=0,arr.ind = T)
 
-  VIFBDf <- data.frame(Relation=paste(rownames(VIFsweights)[idx[,'row']],'<~',colnames(VIFsweights)[idx[,'col']]),
-                      vif=VIFsweights[cbind(rownames(VIFsweights)[idx[,'row']],colnames(VIFsweights)[idx[,'col']])])
+    VIFBDf <- data.frame(Relation=paste(rownames(VIFsweights)[idx[,'row']],'<~',colnames(VIFsweights)[idx[,'col']]),
+                         vif=VIFsweights[cbind(rownames(VIFsweights)[idx[,'row']],colnames(VIFsweights)[idx[,'col']])])
 
-  VIFBvector <-setNames(VIFBDf$vif, VIFBDf$Relation)
+    VIFBvector <-setNames(VIFBDf$vif, VIFBDf$Relation)
 
   } else{
     VIFBvector <- NULL
   }
   return(VIFBvector)
 
 }
-
diff --git a/inst/help/PLSSEM.md b/inst/help/PLSSEM.md
@@ -1,8 +1,101 @@
-Partial Least Squares structural equation modeling
-============
+# Partial Least Squares Structural Equation Modeling (PLS-SEM) in JASP
 
-Perform partial least squares structural equation modeling (PLS-SEM) using `cSEM` (Rademaker & Schuberth, 2020). Go to https://cran.r-project.org/web/packages/cSEM/cSEM.pdf for package information and examples. See also Henseler (2021) for more tutorials using cSEM.
+This document explains how to perform Partial Least Squares Structural Equation Modeling (PLS-SEM) in JASP using the various options provided in the user interface.
 
+## 1. Model Setup
+---
+In the **Model** section, you can specify the structural model by selecting the appropriate grouping variable and setting the syntax for the model.
+
+- **Grouping Variable**: You can select the grouping variable for multi-group analysis. The grouping variable is optional and can be left empty if not required.
+
+## 2. Estimation Options
+---
+In the **Estimation** section, the following options are available:
+
+- **Consistent Partial Least Squares**: Enables the option to use consistent PLS-SEM, which provides consistency in estimations for reflective constructs.
+
+- **Inner Weighting Scheme**: Choose from the following options to calculate inner weights:
+  - Path
+  - Centroid
+  - Factorial
+
+- **Bias-Corrected Bootstrap**: Activate this option for bias-corrected bootstrapping, which refines the confidence intervals for the estimates.
+
+- **Bootstrap Resampling**: You can adjust the number of bootstrap samples for more precise interval estimations. The default value is set to 5,000.
+
+- **Missing Data Handling**: Options for managing missing data, including pairwise or listwise deletion, are available.
+
+## 3. Output Options
+---
+The **Output** section allows you to customize the types of output you want to generate:
+
+- **Path Coefficients**: Display the estimated path coefficients.
+- **Weights and Loadings**: Shows both the indicator weights and loadings, offering insights into how well the indicators measure their corresponding latent variables.
+- **Goodness-of-Fit Measures**: Presents different goodness-of-fit measures like SRMR or NFI.
+- **Reliability Measures**: Enables output of reliability measures such as Cronbach's alpha or composite reliability for the constructs.
+
+Additional correlation measures include:
+- **Observed and Implied Indicator Correlations**
+- **Observed and Implied Construct Correlations**
+
+You can also add **construct scores** to the dataset for further analysis.
+
+## 4. Prediction
+---
+The **Prediction** section includes options for predicting endogenous indicator scores using cross-validation:
+
+- **Cross-Validation k-Folds**: Choose the number of k-folds for cross-validation, with a default value of 10.
+- **Repetitions**: Specify the number of repetitions, with a default value of 10.
+
+You can also select a benchmark to compare predictions against:
+- **None**
+- **Linear Model (LM)**
+- **PLS-PM**
+- **GSCA**
+- **PCA**
+- **MAXVAR**
+- **All**
+
+## 5. Output and Interpretation
+---
+
+### 5.1 Path Coefficients
+The **path coefficients** represent the strength and direction of the relationships between the constructs. These coefficients are similar to regression weights and help in understanding the impact of one latent variable on another. You can also view the **t-values** and **p-values** to assess the significance of these paths.
+
+### 5.2 Indicator Loadings and Weights
+This section shows the **loadings** of each indicator on its associated construct, which indicates how well each observed variable measures the latent construct. Loadings close to 1 indicate a strong relationship between the indicator and its construct. **Weights** are presented in the case of formative constructs, showing the relative importance of each indicator.
+
+### 5.3 Model Fit Indices
+JASP provides several goodness-of-fit measures to evaluate how well the model fits the data:
+- **SRMR (Standardized Root Mean Square Residual)**: A measure of model fit, where lower values (generally below 0.08) indicate a better fit.
+- **NFI (Normed Fit Index)**: Ranges from 0 to 1, with higher values representing a better fit.
+
+### 5.4 Reliability Measures
+Reliability measures assess the internal consistency of the latent constructs:
+- **Cronbach’s Alpha**: A commonly used reliability coefficient; values above 0.7 generally indicate acceptable reliability.
+- **Composite Reliability (CR)**: A measure of internal consistency similar to Cronbach’s Alpha but considers different factor loadings.
+- **Average Variance Extracted (AVE)**: Represents the amount of variance captured by a construct in relation to the variance due to measurement error. AVE values above 0.5 are generally considered acceptable.
+
+### 5.5 R-Squared (R²)
+The R-squared value represents the proportion of variance in the endogenous constructs explained by the model. Higher values indicate better explanatory power. An R-squared value close to 0.7 is considered substantial, while values around 0.3 are moderate.
+
+### 5.6 Cross-Validated Prediction
+If the cross-validation option is selected, the results will include predicted scores for the endogenous indicators. The k-fold cross-validation helps in assessing the predictive power of the model. You can compare the model’s predictions with benchmarks like linear regression or PLS-PM.
+
+### 5.7 Construct Scores
+You can include the estimated construct scores in the dataset for further analysis. These scores represent the latent variables in the model and can be used for additional analyses outside of SEM.
+
+### 5.8 Bootstrapping Results
+If bootstrapping is used, the output includes **bootstrap confidence intervals** for the path coefficients, loadings, and weights. These intervals help in understanding the stability of the parameter estimates.
+
+### 5.9 Prediction Benchmarks
+If benchmarks are selected, you can compare the PLS-SEM model with:
+- **Linear Model** (LM)
+- **Principal Component Analysis** (PCA)
+- **Generalized Structured Component Analysis** (GSCA)
+- **MAXVAR** (Maximum Variance method)
+
+These benchmarks help in evaluating how well your PLS-SEM model predicts the endogenous variables compared to simpler methods.
 
 References
 -------