From 67ffc3a141dfbd780247a6446819858c12132bf8 Mon Sep 17 00:00:00 2001
From: Pedro Ojeda-May <pedroojeda2011@gmail.com>
Date: Wed, 21 Aug 2024 07:07:22 +0200
Subject: [PATCH] R: folders of exercises

---
 exercises/R/DOPARALLEL/FOREACH/doParallel.R   | 33 ++++++++
 .../R/DOPARALLEL/FOREACH/job_doParallel.sh    | 14 ++++
 exercises/R/DOPARALLEL/ML/doParallel_ML.R     | 82 +++++++++++++++++++
 .../R/DOPARALLEL/ML/job_doParallel_ML.sh      | 14 ++++
 exercises/R/JOB-ARRAYS/job.sh                 | 14 ++++
 exercises/R/JOB-ARRAYS/script_arrays.R        |  6 ++
 exercises/R/ML/Rscript.R                      | 46 +++++++++++
 exercises/R/ML/job.sh                         | 14 ++++
 exercises/R/RMPI/Rmpi.R                       | 11 +++
 exercises/R/RMPI/job_Rmpi.sh                  | 13 +++
 exercises/R/SERIAL/job.sh                     | 17 ++++
 exercises/R/SERIAL/serial.R                   |  4 +
 12 files changed, 268 insertions(+)
 create mode 100644 exercises/R/DOPARALLEL/FOREACH/doParallel.R
 create mode 100644 exercises/R/DOPARALLEL/FOREACH/job_doParallel.sh
 create mode 100644 exercises/R/DOPARALLEL/ML/doParallel_ML.R
 create mode 100644 exercises/R/DOPARALLEL/ML/job_doParallel_ML.sh
 create mode 100644 exercises/R/JOB-ARRAYS/job.sh
 create mode 100644 exercises/R/JOB-ARRAYS/script_arrays.R
 create mode 100644 exercises/R/ML/Rscript.R
 create mode 100644 exercises/R/ML/job.sh
 create mode 100644 exercises/R/RMPI/Rmpi.R
 create mode 100644 exercises/R/RMPI/job_Rmpi.sh
 create mode 100644 exercises/R/SERIAL/job.sh
 create mode 100644 exercises/R/SERIAL/serial.R

diff --git a/exercises/R/DOPARALLEL/FOREACH/doParallel.R b/exercises/R/DOPARALLEL/FOREACH/doParallel.R
new file mode 100644
index 00000000..ab2f2780
--- /dev/null
+++ b/exercises/R/DOPARALLEL/FOREACH/doParallel.R
@@ -0,0 +1,33 @@
+#Example taken from: https://cran.r-project.org/web/packages/doParallel/vignettes/gettingstartedParallel.pdf
+library(doParallel)
+
+x <- iris[which(iris[,5] != "setosa"), c(1,5)]
+trials <- 10000
+
+#Sequential version
+stime <- system.time({ 
+    r <- foreach(icount(trials), .combine=cbind) %do% {
+        ind <- sample(100,100, replace=TRUE)
+        result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit))
+        coefficients(result1)
+    }
+})[3]
+
+stime
+
+
+#Parallel version
+cl <- makeCluster(4)
+registerDoParallel(cl)
+
+ptime <- system.time({ 
+    r <- foreach(icount(trials), .combine=cbind) %dopar% {
+        ind <- sample(100,100, replace=TRUE)
+        result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit))
+        coefficients(result1)
+    }
+})[3]
+
+ptime
+
+stopCluster(cl)
diff --git a/exercises/R/DOPARALLEL/FOREACH/job_doParallel.sh b/exercises/R/DOPARALLEL/FOREACH/job_doParallel.sh
new file mode 100644
index 00000000..07e9216e
--- /dev/null
+++ b/exercises/R/DOPARALLEL/FOREACH/job_doParallel.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+#SBATCH -A Project_ID
+#Asking for 10 min.
+#SBATCH -t 00:10:00
+#SBATCH -n 4
+#Writing output and error files
+#SBATCH --output=output%J.out
+#SBATCH --error=error%J.error
+
+ml purge > /dev/null 2>&1
+ml GCC/10.2.0  OpenMPI/4.0.5
+ml R/4.0.4
+
+R --no-save --no-restore -f doParallel.R
diff --git a/exercises/R/DOPARALLEL/ML/doParallel_ML.R b/exercises/R/DOPARALLEL/ML/doParallel_ML.R
new file mode 100644
index 00000000..b74a960d
--- /dev/null
+++ b/exercises/R/DOPARALLEL/ML/doParallel_ML.R
@@ -0,0 +1,82 @@
+#Example taken from: http://michael.hahsler.net/SMU/LearnROnYourOwn/code/doMC.html
+library(doParallel)
+registerDoParallel(cores=4)
+getDoParWorkers()
+
+library(caret)
+library(MASS)
+library(klaR)
+library(nnet)
+library(e1071)
+library(rpart)
+
+data(iris)
+x <- iris[sample(1:nrow(iris)),]
+
+x <- cbind(x, useless = rnorm(nrow(x)))
+x[,1] <- x[,1] + rnorm(nrow(x))
+x[,2] <- x[,2] + rnorm(nrow(x))
+x[,3] <- x[,3] + rnorm(nrow(x))
+
+head(x)
+
+posteriorToClass <- function(predicted) {
+    colnames(predicted$posterior)[apply(predicted$posterior,
+        MARGIN=1, FUN=function(x) which.max(x))]
+}
+
+missclassRate <- function(predicted, true) {
+    confusionM <- table(true, predicted)
+    n <- length(true)
+
+    tp <- sum(diag(confusionM))
+    (n - tp)/n
+}
+
+evaluation <- function() {
+    ## 10% for testing
+    testSize <- floor(nrow(x) * 10/100)
+    test <- sample(1:nrow(x), testSize)
+
+    train_data <- x[-test,]
+    test_data <- x[test, -5]
+    test_class <- x[test, 5]
+
+    ## create model
+    model_knn3 <- knn3(Species~., data=train_data)
+    model_lda <- lda(Species~., data=train_data)
+    model_nnet <- nnet(Species~., data=train_data, size=10, trace=FALSE)
+    model_nb <- NaiveBayes(Species~., data=train_data)
+    model_svm <- svm(Species~., data=train_data)
+    model_rpart <- rpart(Species~., data=train_data)
+
+    ## prediction
+    predicted_knn3 <- predict(model_knn3 , test_data, type="class")
+    predicted_lda <- posteriorToClass(predict(model_lda , test_data))
+    predicted_nnet <- predict(model_nnet, test_data, type="class")
+    predicted_nb <- posteriorToClass(predict(model_nb, test_data))
+    predicted_svm <- predict(model_svm, test_data)
+    predicted_rpart <- predict(model_rpart, test_data, type="class")
+
+    predicted <- list(knn3=predicted_knn3, lda=predicted_lda,
+        nnet=predicted_nnet, nb=predicted_nb, svm=predicted_svm,
+        rpart=predicted_rpart)
+
+    ## calculate missclassifiaction rate
+    sapply(predicted, FUN=
+        function(x) missclassRate(true= test_class, predicted=x))
+}
+
+runs <- 10000
+
+stime <- system.time({
+        sr <- foreach(1:runs, .combine = rbind) %do% evaluation()
+    })
+
+
+ptime <- system.time({
+        pr <- foreach(1:runs, .combine = rbind) %dopar% evaluation()
+    })
+
+timing <- rbind(sequential = stime, parallel = ptime)
+timing
diff --git a/exercises/R/DOPARALLEL/ML/job_doParallel_ML.sh b/exercises/R/DOPARALLEL/ML/job_doParallel_ML.sh
new file mode 100644
index 00000000..14695154
--- /dev/null
+++ b/exercises/R/DOPARALLEL/ML/job_doParallel_ML.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+#SBATCH -A Project_ID
+#Asking for 10 min.
+#SBATCH -t 00:10:00
+#SBATCH -n 4
+#Writing output and error files
+#SBATCH --output=output%J.out
+#SBATCH --error=error%J.error
+
+ml purge > /dev/null 2>&1
+ml GCC/10.2.0  OpenMPI/4.0.5
+ml R/4.0.4
+
+R --no-save --no-restore -f doParallel_ML.R
diff --git a/exercises/R/JOB-ARRAYS/job.sh b/exercises/R/JOB-ARRAYS/job.sh
new file mode 100644
index 00000000..498e3208
--- /dev/null
+++ b/exercises/R/JOB-ARRAYS/job.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+#SBATCH -A Project_ID
+#Asking for 12 min.
+#SBATCH -t 00:12:00
+#SBATCH --array=1-28
+#Writing output and error files
+#SBATCH --output=Array_test.%A_%a.out
+#SBATCH --error=Array_test.%A_%a.error
+
+ml purge > /dev/null 2>&1
+ml GCC/10.2.0  OpenMPI/4.0.5
+ml R/4.0.4
+
+Rscript --quiet --no-save --no-restore script_arrays.R
diff --git a/exercises/R/JOB-ARRAYS/script_arrays.R b/exercises/R/JOB-ARRAYS/script_arrays.R
new file mode 100644
index 00000000..633ed060
--- /dev/null
+++ b/exercises/R/JOB-ARRAYS/script_arrays.R
@@ -0,0 +1,6 @@
+job_id <- Sys.getenv("SLURM_ARRAY_JOB_ID")
+cat(sprintf("This is job ID %s \n", job_id))
+task_id <- Sys.getenv("SLURM_ARRAY_TASK_ID")
+cat(sprintf("This is task ID %s \n", task_id))
+
+Sys.sleep(10)
diff --git a/exercises/R/ML/Rscript.R b/exercises/R/ML/Rscript.R
new file mode 100644
index 00000000..ed0fe302
--- /dev/null
+++ b/exercises/R/ML/Rscript.R
@@ -0,0 +1,46 @@
+#Example taken from https://github.com/lgreski/datasciencectacontent/blob/master/markdown/pml-randomForestPerformance.md
+library(mlbench)
+data(Sonar)
+library(caret)
+set.seed(95014)
+
+# create training & testing data sets
+inTraining <- createDataPartition(Sonar$Class, p = .75, list=FALSE)
+training <- Sonar[inTraining,]
+testing <- Sonar[-inTraining,]
+
+# set up training run for x / y syntax because model format performs poorly
+x <- training[,-61]
+y <- training[,61]
+
+#Serial mode
+fitControl <- trainControl(method = "cv",
+                           number = 25,
+                           allowParallel = FALSE)
+
+stime <- system.time(fit <- train(x,y, method="rf",data=Sonar,trControl = fitControl))
+
+
+
+#Parallel mode
+library(parallel)
+library(doParallel)
+cluster <- makeCluster(1) 
+registerDoParallel(cluster)
+
+fitControl <- trainControl(method = "cv",
+                           number = 25,
+                           allowParallel = TRUE)
+
+ptime <- system.time(fit <- train(x,y, method="rf",data=Sonar,trControl = fitControl))
+
+stopCluster(cluster)
+registerDoSEQ()
+
+fit
+fit$resample
+confusionMatrix.train(fit)
+
+#Timings
+timing <- rbind(sequential = stime, parallel = ptime)
+timing
diff --git a/exercises/R/ML/job.sh b/exercises/R/ML/job.sh
new file mode 100644
index 00000000..bb01723c
--- /dev/null
+++ b/exercises/R/ML/job.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+#SBATCH -A Project_ID
+#Asking for 10 min.
+#SBATCH -t 00:10:00
+#SBATCH -n 1
+#Writing output and error files
+#SBATCH --output=output%J.out
+#SBATCH --error=error%J.error
+
+ml purge > /dev/null 2>&1
+ml GCC/10.2.0  OpenMPI/4.0.5
+ml R/4.0.4
+
+R --no-save --no-restore -f Rscript.R
diff --git a/exercises/R/RMPI/Rmpi.R b/exercises/R/RMPI/Rmpi.R
new file mode 100644
index 00000000..f80740fd
--- /dev/null
+++ b/exercises/R/RMPI/Rmpi.R
@@ -0,0 +1,11 @@
+library("Rmpi")
+print(mpi.universe.size())
+
+mpi.spawn.Rslaves(nslaves=5)
+
+x <- c(10,20,30,40,50)
+mpi.apply(x,runif)
+
+# Close down the MPI processes and quit R
+mpi.close.Rslaves()
+mpi.finalize()
diff --git a/exercises/R/RMPI/job_Rmpi.sh b/exercises/R/RMPI/job_Rmpi.sh
new file mode 100644
index 00000000..9d109665
--- /dev/null
+++ b/exercises/R/RMPI/job_Rmpi.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+#SBATCH -A Project_ID
+#Asking for 10 min.
+#SBATCH -t 00:10:00
+#SBATCH -n 6
+
+export OMPI_MCA_mpi_warn_on_fork=0
+
+ml purge > /dev/null 2>&1
+ml GCC/10.2.0  OpenMPI/4.0.5
+ml R/4.0.4
+
+Rscript --no-save --no-restore Rmpi.R
diff --git a/exercises/R/SERIAL/job.sh b/exercises/R/SERIAL/job.sh
new file mode 100644
index 00000000..192c32e0
--- /dev/null
+++ b/exercises/R/SERIAL/job.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+#SBATCH -A Project_ID
+#Asking for 3 min.
+#SBATCH -t 00:03:00
+#SBATCH -n 1
+#Writing output and error files
+#SBATCH --output=output%J.out
+#SBATCH --error=error%J.error
+
+ml purge > /dev/null 2>&1
+ml GCC/10.2.0  OpenMPI/4.0.5
+ml R/4.0.4
+
+# use the following instructions if you don't need command line arguments
+R CMD BATCH --no-save --no-restore serial.R
+# Rscript is recommended when command line arguments are used
+#Rscript --no-save --no-restore serial.R 3.14
diff --git a/exercises/R/SERIAL/serial.R b/exercises/R/SERIAL/serial.R
new file mode 100644
index 00000000..e649ca75
--- /dev/null
+++ b/exercises/R/SERIAL/serial.R
@@ -0,0 +1,4 @@
+print("Hello World")
+
+argv <- commandArgs(TRUE)
+cat("value of argument=", argv[1])