Skip to content

Commit

Permalink
R: folders of exercises
Browse files Browse the repository at this point in the history
  • Loading branch information
pojeda committed Aug 21, 2024
1 parent 6e8b7e9 commit 67ffc3a
Show file tree
Hide file tree
Showing 12 changed files with 268 additions and 0 deletions.
33 changes: 33 additions & 0 deletions exercises/R/DOPARALLEL/FOREACH/doParallel.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#Example taken from: https://cran.r-project.org/web/packages/doParallel/vignettes/gettingstartedParallel.pdf
library(doParallel)

x <- iris[which(iris[,5] != "setosa"), c(1,5)]
trials <- 10000

#Sequential version
stime <- system.time({
r <- foreach(icount(trials), .combine=cbind) %do% {
ind <- sample(100,100, replace=TRUE)
result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit))
coefficients(result1)
}
})[3]

stime


#Parallel version
cl <- makeCluster(4)
registerDoParallel(cl)

ptime <- system.time({
r <- foreach(icount(trials), .combine=cbind) %dopar% {
ind <- sample(100,100, replace=TRUE)
result1 <- glm(x[ind,2]~x[ind,1], family=binomial(logit))
coefficients(result1)
}
})[3]

ptime

stopCluster(cl)
14 changes: 14 additions & 0 deletions exercises/R/DOPARALLEL/FOREACH/job_doParallel.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH -A Project_ID
#Asking for 10 min.
#SBATCH -t 00:10:00
#SBATCH -n 4
#Writing output and error files
#SBATCH --output=output%J.out
#SBATCH --error=error%J.error

ml purge > /dev/null 2>&1
ml GCC/10.2.0 OpenMPI/4.0.5
ml R/4.0.4

R --no-save --no-restore -f doParallel.R
82 changes: 82 additions & 0 deletions exercises/R/DOPARALLEL/ML/doParallel_ML.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#Example taken from: http://michael.hahsler.net/SMU/LearnROnYourOwn/code/doMC.html
library(doParallel)
registerDoParallel(cores=4)
getDoParWorkers()

library(caret)
library(MASS)
library(klaR)
library(nnet)
library(e1071)
library(rpart)

data(iris)
x <- iris[sample(1:nrow(iris)),]

x <- cbind(x, useless = rnorm(nrow(x)))
x[,1] <- x[,1] + rnorm(nrow(x))
x[,2] <- x[,2] + rnorm(nrow(x))
x[,3] <- x[,3] + rnorm(nrow(x))

head(x)

posteriorToClass <- function(predicted) {
colnames(predicted$posterior)[apply(predicted$posterior,
MARGIN=1, FUN=function(x) which.max(x))]
}

missclassRate <- function(predicted, true) {
confusionM <- table(true, predicted)
n <- length(true)

tp <- sum(diag(confusionM))
(n - tp)/n
}

evaluation <- function() {
## 10% for testing
testSize <- floor(nrow(x) * 10/100)
test <- sample(1:nrow(x), testSize)

train_data <- x[-test,]
test_data <- x[test, -5]
test_class <- x[test, 5]

## create model
model_knn3 <- knn3(Species~., data=train_data)
model_lda <- lda(Species~., data=train_data)
model_nnet <- nnet(Species~., data=train_data, size=10, trace=FALSE)
model_nb <- NaiveBayes(Species~., data=train_data)
model_svm <- svm(Species~., data=train_data)
model_rpart <- rpart(Species~., data=train_data)

## prediction
predicted_knn3 <- predict(model_knn3 , test_data, type="class")
predicted_lda <- posteriorToClass(predict(model_lda , test_data))
predicted_nnet <- predict(model_nnet, test_data, type="class")
predicted_nb <- posteriorToClass(predict(model_nb, test_data))
predicted_svm <- predict(model_svm, test_data)
predicted_rpart <- predict(model_rpart, test_data, type="class")

predicted <- list(knn3=predicted_knn3, lda=predicted_lda,
nnet=predicted_nnet, nb=predicted_nb, svm=predicted_svm,
rpart=predicted_rpart)

## calculate missclassifiaction rate
sapply(predicted, FUN=
function(x) missclassRate(true= test_class, predicted=x))
}

runs <- 10000

stime <- system.time({
sr <- foreach(1:runs, .combine = rbind) %do% evaluation()
})


ptime <- system.time({
pr <- foreach(1:runs, .combine = rbind) %dopar% evaluation()
})

timing <- rbind(sequential = stime, parallel = ptime)
timing
14 changes: 14 additions & 0 deletions exercises/R/DOPARALLEL/ML/job_doParallel_ML.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH -A Project_ID
#Asking for 10 min.
#SBATCH -t 00:10:00
#SBATCH -n 4
#Writing output and error files
#SBATCH --output=output%J.out
#SBATCH --error=error%J.error

ml purge > /dev/null 2>&1
ml GCC/10.2.0 OpenMPI/4.0.5
ml R/4.0.4

R --no-save --no-restore -f doParallel_ML.R
14 changes: 14 additions & 0 deletions exercises/R/JOB-ARRAYS/job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH -A Project_ID
#Asking for 12 min.
#SBATCH -t 00:12:00
#SBATCH --array=1-28
#Writing output and error files
#SBATCH --output=Array_test.%A_%a.out
#SBATCH --error=Array_test.%A_%a.error

ml purge > /dev/null 2>&1
ml GCC/10.2.0 OpenMPI/4.0.5
ml R/4.0.4

Rscript --quiet --no-save --no-restore script_arrays.R
6 changes: 6 additions & 0 deletions exercises/R/JOB-ARRAYS/script_arrays.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
job_id <- Sys.getenv("SLURM_ARRAY_JOB_ID")
cat(sprintf("This is job ID %s \n", job_id))
task_id <- Sys.getenv("SLURM_ARRAY_TASK_ID")
cat(sprintf("This is task ID %s \n", task_id))

Sys.sleep(10)
46 changes: 46 additions & 0 deletions exercises/R/ML/Rscript.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#Example taken from https://github.com/lgreski/datasciencectacontent/blob/master/markdown/pml-randomForestPerformance.md
library(mlbench)
data(Sonar)
library(caret)
set.seed(95014)

# create training & testing data sets
inTraining <- createDataPartition(Sonar$Class, p = .75, list=FALSE)
training <- Sonar[inTraining,]
testing <- Sonar[-inTraining,]

# set up training run for x / y syntax because model format performs poorly
x <- training[,-61]
y <- training[,61]

#Serial mode
fitControl <- trainControl(method = "cv",
number = 25,
allowParallel = FALSE)

stime <- system.time(fit <- train(x,y, method="rf",data=Sonar,trControl = fitControl))



#Parallel mode
library(parallel)
library(doParallel)
cluster <- makeCluster(1)
registerDoParallel(cluster)

fitControl <- trainControl(method = "cv",
number = 25,
allowParallel = TRUE)

ptime <- system.time(fit <- train(x,y, method="rf",data=Sonar,trControl = fitControl))

stopCluster(cluster)
registerDoSEQ()

fit
fit$resample
confusionMatrix.train(fit)

#Timings
timing <- rbind(sequential = stime, parallel = ptime)
timing
14 changes: 14 additions & 0 deletions exercises/R/ML/job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH -A Project_ID
#Asking for 10 min.
#SBATCH -t 00:10:00
#SBATCH -n 1
#Writing output and error files
#SBATCH --output=output%J.out
#SBATCH --error=error%J.error

ml purge > /dev/null 2>&1
ml GCC/10.2.0 OpenMPI/4.0.5
ml R/4.0.4

R --no-save --no-restore -f Rscript.R
11 changes: 11 additions & 0 deletions exercises/R/RMPI/Rmpi.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
library("Rmpi")
print(mpi.universe.size())

mpi.spawn.Rslaves(nslaves=5)

x <- c(10,20,30,40,50)
mpi.apply(x,runif)

# Close down the MPI processes and quit R
mpi.close.Rslaves()
mpi.finalize()
13 changes: 13 additions & 0 deletions exercises/R/RMPI/job_Rmpi.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
#SBATCH -A Project_ID
#Asking for 10 min.
#SBATCH -t 00:10:00
#SBATCH -n 6

export OMPI_MCA_mpi_warn_on_fork=0

ml purge > /dev/null 2>&1
ml GCC/10.2.0 OpenMPI/4.0.5
ml R/4.0.4

Rscript --no-save --no-restore Rmpi.R
17 changes: 17 additions & 0 deletions exercises/R/SERIAL/job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash
#SBATCH -A Project_ID
#Asking for 3 min.
#SBATCH -t 00:03:00
#SBATCH -n 1
#Writing output and error files
#SBATCH --output=output%J.out
#SBATCH --error=error%J.error

ml purge > /dev/null 2>&1
ml GCC/10.2.0 OpenMPI/4.0.5
ml R/4.0.4

# use the following instructions if you don't need command line arguments
R CMD BATCH --no-save --no-restore serial.R
# Rscript is recommended when command line arguments are used
#Rscript --no-save --no-restore serial.R 3.14
4 changes: 4 additions & 0 deletions exercises/R/SERIAL/serial.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
print("Hello World")

argv <- commandArgs(TRUE)
cat("value of argument=", argv[1])

0 comments on commit 67ffc3a

Please sign in to comment.