From 0b0d7fa30f6261eb01a7065744549ee9521c2f45 Mon Sep 17 00:00:00 2001 From: massimoaria Date: Tue, 12 Mar 2024 13:10:53 +0100 Subject: [PATCH] #433 Solved --- R/Hindex.R | 6 +++--- R/bib2df.R | 12 +++++++----- R/convert2df.R | 20 +++++++++++++++++--- R/histNetwork.R | 4 +++- inst/biblioshiny/utils.R | 8 ++++---- 5 files changed, 34 insertions(+), 16 deletions(-) diff --git a/R/Hindex.R b/R/Hindex.R index a9673115..6fbe6eac 100644 --- a/R/Hindex.R +++ b/R/Hindex.R @@ -59,13 +59,13 @@ Hindex <- function(M, field="author", elements=NULL, sep = ";",years=Inf){ switch(field, author={ - AU=M$AU - AU=trimES(gsub(","," ",AU)) + AU <- M$AU + AU <- trimES(gsub(","," ",AU)) listAU <- strsplit(AU, split=sep) l <- lengths(listAU) index= rep(row.names(M), l) df <- M[index,] - df$AUs <- unlist(listAU) + df$AUs <- trimws(unlist(listAU)) }, source={ df <- M diff --git a/R/bib2df.R b/R/bib2df.R index 91fbe0d0..df34ec50 100644 --- a/R/bib2df.R +++ b/R/bib2df.R @@ -83,7 +83,7 @@ bib2df<-function(D, dbsource = "isi"){ names(df) <- gsub("=\\{","",Tag) ### replace "---" with ";" - tagsComma <- c("AU","DE","ID","C1" ,"CR") + tagsComma <- c("AU","DE","ID","C1","CR") nolab <- setdiff(tagsComma,names(df)) if (length(nolab)>0){ cat("\nWarning:\nIn your file, some mandatory metadata are missing. Bibliometrix functions may not work properly!\n @@ -98,6 +98,8 @@ Please, take a look at the vignettes: gsub("---",";",x) })) + df1$AU <- gsub(" and;| and ",";",df1$AU) + ### replace "---" with " " otherTags <- setdiff(names(df),tagsComma) df2 <- data.frame(lapply(df[otherTags],function(x){ @@ -116,8 +118,7 @@ Please, take a look at the vignettes: df <- postprocessing(df, dbsource) - df <- df[names(df)!="Paper"] - df <- df[names(df)!="paper"] + df <- df[!names(df) %in% c("Paper", "paper")] return(df) } @@ -127,9 +128,10 @@ postprocessing <-function(DATA,dbsource){ # Authors' names cleaning (surname and initials) #remove ; and 2 or more spaces - DATA$AU=gsub("\\s+", " ", DATA$AU) + DATA$AU <- gsub("\\s+", " ", DATA$AU) + DATA$AF <- gsub("\\.|,","",DATA$AU) - listAU <- strsplit(DATA$AU, " and ") + listAU <- strsplit(DATA$AU, ";") AU <- lapply(listAU,function(l){ diff --git a/R/convert2df.R b/R/convert2df.R index bd7d8cba..f5b5f233 100644 --- a/R/convert2df.R +++ b/R/convert2df.R @@ -92,11 +92,17 @@ convert2df<-function(file,dbsource="wos",format="plaintext", remove.duplicates=T switch(format, bibtex = { D <- importFiles(file) - M <- bib2df(D, dbsource = "isi") + M <- bib2df(D, dbsource = "isi") + # %>% + # rename(AU_IN = .data$AU, + # AU = .data$AF) }, plaintext = { D <- importFiles(file) - M <- isi2df(D) + M <- isi2df(D) + # %>% + # rename(AU_IN = .data$AU, + # AU = .data$AF) }) }, ## db SCOPUS @@ -105,9 +111,17 @@ convert2df<-function(file,dbsource="wos",format="plaintext", remove.duplicates=T bibtex = { D <- importFiles(file) M <- bib2df(D, dbsource = "scopus") + M <- M + # %>% + # rename(AU_IN = .data$AU, + # AU = .data$AF) }, csv = { - M <- csvScopus2df(file) + M <- csvScopus2df(file) + # %>% + # rename(AU_IN = .data$AU, + # AU = .data$AF) + }) }, ## db GENERIC BIBTEX diff --git a/R/histNetwork.R b/R/histNetwork.R index 56bdc6e0..2a23bcb4 100644 --- a/R/histNetwork.R +++ b/R/histNetwork.R @@ -218,7 +218,7 @@ scopus <- function(M, min.citations, sep, network, verbose){ M_merge <- M %>% select(.data$AU,.data$PY,.data$Page.start, .data$Page.end, .data$PP, .data$SR) %>% - mutate(AU = gsub(";.*$", "", .data$AU), + mutate(AU = trimws(gsub("\\.", "", gsub("\\. ", "", gsub("^(.*?),.*$", "\\1", .data$SR)))), Page.start = as.numeric(.data$Page.start), Page.end = as.numeric(.data$Page.end), PP = ifelse(!is.na(.data$Page.start), paste0(.data$Page.start,"-",.data$Page.end), NA), @@ -232,6 +232,8 @@ scopus <- function(M, min.citations, sep, network, verbose){ group_by(.data$PY,.data$AU) %>% mutate(toRemove = ifelse(!is.na(.data$PP.y) & .data$PP.x!=.data$PP.y, TRUE,FALSE)) %>% # to remove FALSE POSITIVE ungroup() %>% + dplyr::filter(.data$toRemove != TRUE) %>% + mutate(toRemove = ifelse(!is.na(.data$PP.x) & is.na(.data$PP.y),TRUE,FALSE)) %>% dplyr::filter(.data$toRemove != TRUE) LCS <- CR %>% diff --git a/inst/biblioshiny/utils.R b/inst/biblioshiny/utils.R index 29566f3a..d729e292 100644 --- a/inst/biblioshiny/utils.R +++ b/inst/biblioshiny/utils.R @@ -367,13 +367,13 @@ Hindex_plot <- function(values, type, input){ switch(type, author={ - AU <- trim(gsub(",","",names(tableTag(values$M,"AU")))) - values$H <- Hindex(values$M, field = "author", elements = AU, sep = ";", years=Inf)$H %>% + #AU <- trim(gsub(",","",names(tableTag(values$M,"AU")))) + values$H <- Hindex(values$M, field = "author", elements = NULL, sep = ";", years=Inf)$H %>% arrange(desc(.data$h_index)) }, source={ - SO <- names(sort(table(values$M$SO),decreasing = TRUE)) - values$H <- Hindex(values$M, field = "source", elements = SO, sep = ";", years=Inf)$H %>% + #SO <- names(sort(table(values$M$SO),decreasing = TRUE)) + values$H <- Hindex(values$M, field = "source", elements = NULL, sep = ";", years=Inf)$H %>% arrange(desc(.data$h_index)) } )