From 67ffc3a141dfbd780247a6446819858c12132bf8 Mon Sep 17 00:00:00 2001 From: Pedro Ojeda-May Date: Wed, 21 Aug 2024 07:07:22 +0200 Subject: [PATCH] R: folders of exercises --- exercises/R/DOPARALLEL/FOREACH/doParallel.R | 33 ++++++++ .../R/DOPARALLEL/FOREACH/job_doParallel.sh | 14 ++++ exercises/R/DOPARALLEL/ML/doParallel_ML.R | 82 +++++++++++++++++++ .../R/DOPARALLEL/ML/job_doParallel_ML.sh | 14 ++++ exercises/R/JOB-ARRAYS/job.sh | 14 ++++ exercises/R/JOB-ARRAYS/script_arrays.R | 6 ++ exercises/R/ML/Rscript.R | 46 +++++++++++ exercises/R/ML/job.sh | 14 ++++ exercises/R/RMPI/Rmpi.R | 11 +++ exercises/R/RMPI/job_Rmpi.sh | 13 +++ exercises/R/SERIAL/job.sh | 17 ++++ exercises/R/SERIAL/serial.R | 4 + 12 files changed, 268 insertions(+) create mode 100644 exercises/R/DOPARALLEL/FOREACH/doParallel.R create mode 100644 exercises/R/DOPARALLEL/FOREACH/job_doParallel.sh create mode 100644 exercises/R/DOPARALLEL/ML/doParallel_ML.R create mode 100644 exercises/R/DOPARALLEL/ML/job_doParallel_ML.sh create mode 100644 exercises/R/JOB-ARRAYS/job.sh create mode 100644 exercises/R/JOB-ARRAYS/script_arrays.R create mode 100644 exercises/R/ML/Rscript.R create mode 100644 exercises/R/ML/job.sh create mode 100644 exercises/R/RMPI/Rmpi.R create mode 100644 exercises/R/RMPI/job_Rmpi.sh create mode 100644 exercises/R/SERIAL/job.sh create mode 100644 exercises/R/SERIAL/serial.R diff --git a/exercises/R/DOPARALLEL/FOREACH/doParallel.R b/exercises/R/DOPARALLEL/FOREACH/doParallel.R new file mode 100644 index 00000000..ab2f2780 --- /dev/null +++ b/exercises/R/DOPARALLEL/FOREACH/doParallel.R @@ -0,0 +1,33 @@ +#Example taken from: https://cran.r-project.org/web/packages/doParallel/vignettes/gettingstartedParallel.pdf +library(doParallel) + +x <- iris[which(iris[,5] != "setosa"), c(1,5)] +trials <- 10000 + +#Sequential version +stime <- system.time({ + r <- foreach(icount(trials), .combine=cbind) %do% { + ind <- sample(100,100, replace=TRUE) + result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit)) + coefficients(result1) + } +})[3] + +stime + + +#Parallel version +cl <- makeCluster(4) +registerDoParallel(cl) + +ptime <- system.time({ + r <- foreach(icount(trials), .combine=cbind) %dopar% { + ind <- sample(100,100, replace=TRUE) + result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit)) + coefficients(result1) + } +})[3] + +ptime + +stopCluster(cl) diff --git a/exercises/R/DOPARALLEL/FOREACH/job_doParallel.sh b/exercises/R/DOPARALLEL/FOREACH/job_doParallel.sh new file mode 100644 index 00000000..07e9216e --- /dev/null +++ b/exercises/R/DOPARALLEL/FOREACH/job_doParallel.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH -A Project_ID +#Asking for 10 min. +#SBATCH -t 00:10:00 +#SBATCH -n 4 +#Writing output and error files +#SBATCH --output=output%J.out +#SBATCH --error=error%J.error + +ml purge > /dev/null 2>&1 +ml GCC/10.2.0 OpenMPI/4.0.5 +ml R/4.0.4 + +R --no-save --no-restore -f doParallel.R diff --git a/exercises/R/DOPARALLEL/ML/doParallel_ML.R b/exercises/R/DOPARALLEL/ML/doParallel_ML.R new file mode 100644 index 00000000..b74a960d --- /dev/null +++ b/exercises/R/DOPARALLEL/ML/doParallel_ML.R @@ -0,0 +1,82 @@ +#Example taken from: http://michael.hahsler.net/SMU/LearnROnYourOwn/code/doMC.html +library(doParallel) +registerDoParallel(cores=4) +getDoParWorkers() + +library(caret) +library(MASS) +library(klaR) +library(nnet) +library(e1071) +library(rpart) + +data(iris) +x <- iris[sample(1:nrow(iris)),] + +x <- cbind(x, useless = rnorm(nrow(x))) +x[,1] <- x[,1] + rnorm(nrow(x)) +x[,2] <- x[,2] + rnorm(nrow(x)) +x[,3] <- x[,3] + rnorm(nrow(x)) + +head(x) + +posteriorToClass <- function(predicted) { + colnames(predicted$posterior)[apply(predicted$posterior, + MARGIN=1, FUN=function(x) which.max(x))] +} + +missclassRate <- function(predicted, true) { + confusionM <- table(true, predicted) + n <- length(true) + + tp <- sum(diag(confusionM)) + (n - tp)/n +} + +evaluation <- function() { + ## 10% for testing + testSize <- floor(nrow(x) * 10/100) + test <- sample(1:nrow(x), testSize) + + train_data <- x[-test,] + test_data <- x[test, -5] + test_class <- x[test, 5] + + ## create model + model_knn3 <- knn3(Species~., data=train_data) + model_lda <- lda(Species~., data=train_data) + model_nnet <- nnet(Species~., data=train_data, size=10, trace=FALSE) + model_nb <- NaiveBayes(Species~., data=train_data) + model_svm <- svm(Species~., data=train_data) + model_rpart <- rpart(Species~., data=train_data) + + ## prediction + predicted_knn3 <- predict(model_knn3 , test_data, type="class") + predicted_lda <- posteriorToClass(predict(model_lda , test_data)) + predicted_nnet <- predict(model_nnet, test_data, type="class") + predicted_nb <- posteriorToClass(predict(model_nb, test_data)) + predicted_svm <- predict(model_svm, test_data) + predicted_rpart <- predict(model_rpart, test_data, type="class") + + predicted <- list(knn3=predicted_knn3, lda=predicted_lda, + nnet=predicted_nnet, nb=predicted_nb, svm=predicted_svm, + rpart=predicted_rpart) + + ## calculate missclassifiaction rate + sapply(predicted, FUN= + function(x) missclassRate(true= test_class, predicted=x)) +} + +runs <- 10000 + +stime <- system.time({ + sr <- foreach(1:runs, .combine = rbind) %do% evaluation() + }) + + +ptime <- system.time({ + pr <- foreach(1:runs, .combine = rbind) %dopar% evaluation() + }) + +timing <- rbind(sequential = stime, parallel = ptime) +timing diff --git a/exercises/R/DOPARALLEL/ML/job_doParallel_ML.sh b/exercises/R/DOPARALLEL/ML/job_doParallel_ML.sh new file mode 100644 index 00000000..14695154 --- /dev/null +++ b/exercises/R/DOPARALLEL/ML/job_doParallel_ML.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH -A Project_ID +#Asking for 10 min. +#SBATCH -t 00:10:00 +#SBATCH -n 4 +#Writing output and error files +#SBATCH --output=output%J.out +#SBATCH --error=error%J.error + +ml purge > /dev/null 2>&1 +ml GCC/10.2.0 OpenMPI/4.0.5 +ml R/4.0.4 + +R --no-save --no-restore -f doParallel_ML.R diff --git a/exercises/R/JOB-ARRAYS/job.sh b/exercises/R/JOB-ARRAYS/job.sh new file mode 100644 index 00000000..498e3208 --- /dev/null +++ b/exercises/R/JOB-ARRAYS/job.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH -A Project_ID +#Asking for 12 min. +#SBATCH -t 00:12:00 +#SBATCH --array=1-28 +#Writing output and error files +#SBATCH --output=Array_test.%A_%a.out +#SBATCH --error=Array_test.%A_%a.error + +ml purge > /dev/null 2>&1 +ml GCC/10.2.0 OpenMPI/4.0.5 +ml R/4.0.4 + +Rscript --quiet --no-save --no-restore script_arrays.R diff --git a/exercises/R/JOB-ARRAYS/script_arrays.R b/exercises/R/JOB-ARRAYS/script_arrays.R new file mode 100644 index 00000000..633ed060 --- /dev/null +++ b/exercises/R/JOB-ARRAYS/script_arrays.R @@ -0,0 +1,6 @@ +job_id <- Sys.getenv("SLURM_ARRAY_JOB_ID") +cat(sprintf("This is job ID %s \n", job_id)) +task_id <- Sys.getenv("SLURM_ARRAY_TASK_ID") +cat(sprintf("This is task ID %s \n", task_id)) + +Sys.sleep(10) diff --git a/exercises/R/ML/Rscript.R b/exercises/R/ML/Rscript.R new file mode 100644 index 00000000..ed0fe302 --- /dev/null +++ b/exercises/R/ML/Rscript.R @@ -0,0 +1,46 @@ +#Example taken from https://github.com/lgreski/datasciencectacontent/blob/master/markdown/pml-randomForestPerformance.md +library(mlbench) +data(Sonar) +library(caret) +set.seed(95014) + +# create training & testing data sets +inTraining <- createDataPartition(Sonar$Class, p = .75, list=FALSE) +training <- Sonar[inTraining,] +testing <- Sonar[-inTraining,] + +# set up training run for x / y syntax because model format performs poorly +x <- training[,-61] +y <- training[,61] + +#Serial mode +fitControl <- trainControl(method = "cv", + number = 25, + allowParallel = FALSE) + +stime <- system.time(fit <- train(x,y, method="rf",data=Sonar,trControl = fitControl)) + + + +#Parallel mode +library(parallel) +library(doParallel) +cluster <- makeCluster(1) +registerDoParallel(cluster) + +fitControl <- trainControl(method = "cv", + number = 25, + allowParallel = TRUE) + +ptime <- system.time(fit <- train(x,y, method="rf",data=Sonar,trControl = fitControl)) + +stopCluster(cluster) +registerDoSEQ() + +fit +fit$resample +confusionMatrix.train(fit) + +#Timings +timing <- rbind(sequential = stime, parallel = ptime) +timing diff --git a/exercises/R/ML/job.sh b/exercises/R/ML/job.sh new file mode 100644 index 00000000..bb01723c --- /dev/null +++ b/exercises/R/ML/job.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH -A Project_ID +#Asking for 10 min. +#SBATCH -t 00:10:00 +#SBATCH -n 1 +#Writing output and error files +#SBATCH --output=output%J.out +#SBATCH --error=error%J.error + +ml purge > /dev/null 2>&1 +ml GCC/10.2.0 OpenMPI/4.0.5 +ml R/4.0.4 + +R --no-save --no-restore -f Rscript.R diff --git a/exercises/R/RMPI/Rmpi.R b/exercises/R/RMPI/Rmpi.R new file mode 100644 index 00000000..f80740fd --- /dev/null +++ b/exercises/R/RMPI/Rmpi.R @@ -0,0 +1,11 @@ +library("Rmpi") +print(mpi.universe.size()) + +mpi.spawn.Rslaves(nslaves=5) + +x <- c(10,20,30,40,50) +mpi.apply(x,runif) + +# Close down the MPI processes and quit R +mpi.close.Rslaves() +mpi.finalize() diff --git a/exercises/R/RMPI/job_Rmpi.sh b/exercises/R/RMPI/job_Rmpi.sh new file mode 100644 index 00000000..9d109665 --- /dev/null +++ b/exercises/R/RMPI/job_Rmpi.sh @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH -A Project_ID +#Asking for 10 min. +#SBATCH -t 00:10:00 +#SBATCH -n 6 + +export OMPI_MCA_mpi_warn_on_fork=0 + +ml purge > /dev/null 2>&1 +ml GCC/10.2.0 OpenMPI/4.0.5 +ml R/4.0.4 + +Rscript --no-save --no-restore Rmpi.R diff --git a/exercises/R/SERIAL/job.sh b/exercises/R/SERIAL/job.sh new file mode 100644 index 00000000..192c32e0 --- /dev/null +++ b/exercises/R/SERIAL/job.sh @@ -0,0 +1,17 @@ +#!/bin/bash +#SBATCH -A Project_ID +#Asking for 3 min. +#SBATCH -t 00:03:00 +#SBATCH -n 1 +#Writing output and error files +#SBATCH --output=output%J.out +#SBATCH --error=error%J.error + +ml purge > /dev/null 2>&1 +ml GCC/10.2.0 OpenMPI/4.0.5 +ml R/4.0.4 + +# use the following instructions if you don't need command line arguments +R CMD BATCH --no-save --no-restore serial.R +# Rscript is recommended when command line arguments are used +#Rscript --no-save --no-restore serial.R 3.14 diff --git a/exercises/R/SERIAL/serial.R b/exercises/R/SERIAL/serial.R new file mode 100644 index 00000000..e649ca75 --- /dev/null +++ b/exercises/R/SERIAL/serial.R @@ -0,0 +1,4 @@ +print("Hello World") + +argv <- commandArgs(TRUE) +cat("value of argument=", argv[1])