Skip to content

Commit

Permalink
Add myeloid fine scope classifier (#120)
Browse files Browse the repository at this point in the history
* Add myeloid fine scope classifier
  • Loading branch information
bbimber authored Jun 7, 2024
1 parent a42a90a commit b06e5f0
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 3 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(AssignCellType)
export(CalculateUCellScores)
export(Classify_ImmuneCells)
export(Classify_Myeloid)
export(Classify_TNK)
export(ExpandGeneList)
export(ExtractGeneWeights)
Expand Down
45 changes: 44 additions & 1 deletion R/CellTypist.R
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,49 @@ Classify_TNK <- function(seuratObj, assayName = Seurat::DefaultAssay(seuratObj),
}


#' @title Classify T/NK
#'
#' @description Runs celltypist using the RIRA Myeloid model to score cells using CellTypist with optimized parameters.
#' @param seuratObj The seurat object
#' @param assayName The name of the assay to use. Others will be dropped
#' @param columnPrefix A prefix that will be added to the beginning of the resulting columns, added the [email protected]
#' @param maxAllowableClasses Celltypist can assign a cell to many classes, creating extremely long labels. Any cell with more than this number of labels will be set to NA
#' @param minFractionToInclude If non-null, any labels with fewer than this fraction of cells will be set to NA.
#' @param minCellsToRun If the input seurat object has fewer than this many cells, NAs will be added for all expected columns and celltypist will not be run.
#' @param maxBatchSize If more than this many cells are in the object, it will be split into batches of this size and run in serial.
#' @param retainProbabilityMatrix If true, the celltypist probability_matrix with per-class probabilities will be stored in meta.data
#'
#' @export
Classify_Myeloid <- function(seuratObj, assayName = Seurat::DefaultAssay(seuratObj), columnPrefix = 'RIRA_Myeloid_v3.', maxAllowableClasses = 6, minFractionToInclude = 0.01, minCellsToRun = 200, maxBatchSize = 600000, retainProbabilityMatrix = FALSE) {
seuratObj <- RunCellTypist(seuratObj = seuratObj,
modelName = "RIRA_FineScope_Myeloid_v3",
# These are optimized for this model:
pThreshold = 0.5, minProp = 0, useMajorityVoting = FALSE, mode = "prob_match",

assayName = assayName,
columnPrefix = columnPrefix,
maxAllowableClasses = maxAllowableClasses,
minFractionToInclude = minFractionToInclude,
minCellsToRun = minCellsToRun,
maxBatchSize = maxBatchSize,
retainProbabilityMatrix = retainProbabilityMatrix
)

fn2 <- paste0(columnPrefix, 'cellclass')
if (! fn2 %in% names(seuratObj@meta.data)) {
stop(paste0('Missing field: ', fn2))
}

fn <- paste0(columnPrefix, 'coarseclass')
vect <- as.character(seuratObj@meta.data[[fn2]])
vect[seuratObj@meta.data[[fn2]] %in% c('CD14+ Monocytes', 'CD16+ Monocytes', 'Inflammatory Monocytes')] <- 'Monocytes'
vect[seuratObj@meta.data[[fn2]] %in% c('DC', 'Mature DC')] <- 'DC'
seuratObj[[fn]] <- as.factor(vect)

return(seuratObj)
}


#' @title Classify Bulk Immune cells
#'
#' @description Runs celltypist using the RIRA bulk immune model to score cells using CellTypist with optimized parameters.
Expand Down Expand Up @@ -553,7 +596,7 @@ FilterDisallowedClasses <- function(seuratObj, sourceField = 'RIRA_Immune_v2.maj
}
}

allCells <- data.frame(cellbarcode = colnames(seuratObj), sortOrder = 1:ncol(seuratObj))
allCells <- data.frame(cellbarcode = colnames(seuratObj), sortOrder = seq_len(ncol(seuratObj)))
if (nrow(toDrop) > 0) {
allCells <- merge(allCells, toDrop, by = 'cellbarcode', all.x = T)
allCells <- dplyr::arrange(allCells, sortOrder)
Expand Down
Binary file added inst/models/RIRA_FineScope_Myeloid_v3.pkl
Binary file not shown.
37 changes: 37 additions & 0 deletions man/Classify_Myeloid.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions tests/testthat/test-celltypist.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,16 +78,24 @@ test_that("celltypist runs for RIRA models", {
seuratObj <- getBaseSeuratData()
seuratObj <- Seurat::NormalizeData(seuratObj, verbose = FALSE)
seuratObj <- Classify_TNK(seuratObj, retainProbabilityMatrix = TRUE)

print(table(seuratObj$RIRA_TNK_v2.cellclass))

expect_equal(4, length(unique(seuratObj$RIRA_TNK_v2.cellclass)), info = 'using RIRA T_NK', tolerance = 1)
expect_equal(221, unname(table(seuratObj$RIRA_TNK_v2.cellclass)['CD4+ T Cells']), tolerance = 1)
expect_equal(1028, unname(table(seuratObj$RIRA_TNK_v2.cellclass)['CD8+ T Cells']), tolerance = 1)
expect_equal(66, unname(table(seuratObj$RIRA_TNK_v2.cellclass)['NK Cells']), tolerance = 1)
expect_equal(1366, unname(table(seuratObj$RIRA_TNK_v2.cellclass)['Unassigned']), tolerance = 1)

expect_equal(6.64e-08, min(seuratObj$RIRA_TNK_v2.prob.NK.Cells), tolerance = 0.00001)

seuratObj <- Classify_Myeloid(seuratObj, retainProbabilityMatrix = TRUE)
print('RIRA_Myeloid_v3:')
print(table(seuratObj$RIRA_Myeloid_v3.cellclass))
print(table(seuratObj$RIRA_Myeloid_v3.coarseclass))

expect_equal(5, length(unique(seuratObj$RIRA_Myeloid_v3.cellclass)), info = 'using RIRA Myeloid')
expect_equal(32, unname(table(seuratObj$RIRA_Myeloid_v3.cellclass)['DC']), tolerance = 1)
expect_equal(32, unname(table(seuratObj$RIRA_Myeloid_v3.coarseclass)['DC']), tolerance = 1)
})

test_that("FilterDisallowedClasses works as expected", {
Expand Down

0 comments on commit b06e5f0

Please sign in to comment.