From 0b0d7fa30f6261eb01a7065744549ee9521c2f45 Mon Sep 17 00:00:00 2001
From: massimoaria <aria@unina.it>
Date: Tue, 12 Mar 2024 13:10:53 +0100
Subject: [PATCH] #433 Solved

---
 R/Hindex.R               |  6 +++---
 R/bib2df.R               | 12 +++++++-----
 R/convert2df.R           | 20 +++++++++++++++++---
 R/histNetwork.R          |  4 +++-
 inst/biblioshiny/utils.R |  8 ++++----
 5 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/R/Hindex.R b/R/Hindex.R
index a9673115..6fbe6eac 100644
--- a/R/Hindex.R
+++ b/R/Hindex.R
@@ -59,13 +59,13 @@ Hindex <- function(M, field="author", elements=NULL, sep = ";",years=Inf){
   
   switch(field,
          author={
-           AU=M$AU
-           AU=trimES(gsub(","," ",AU))
+           AU <- M$AU
+           AU <- trimES(gsub(","," ",AU))
            listAU <- strsplit(AU, split=sep)
            l <- lengths(listAU)
            index= rep(row.names(M), l)
            df <- M[index,]
-           df$AUs <- unlist(listAU)
+           df$AUs <- trimws(unlist(listAU))
          },
          source={
            df <- M
diff --git a/R/bib2df.R b/R/bib2df.R
index 91fbe0d0..df34ec50 100644
--- a/R/bib2df.R
+++ b/R/bib2df.R
@@ -83,7 +83,7 @@ bib2df<-function(D, dbsource = "isi"){
   names(df) <- gsub("=\\{","",Tag)
   
   ### replace "---" with ";"
-  tagsComma <- c("AU","DE","ID","C1" ,"CR")
+  tagsComma <- c("AU","DE","ID","C1","CR")
   nolab <- setdiff(tagsComma,names(df))
   if (length(nolab)>0){
     cat("\nWarning:\nIn your file, some mandatory metadata are missing. Bibliometrix functions may not work properly!\n
@@ -98,6 +98,8 @@ Please, take a look at the vignettes:
     gsub("---",";",x)
   }))
   
+  df1$AU <- gsub(" and;| and ",";",df1$AU)
+  
   ### replace "---" with " "
   otherTags <- setdiff(names(df),tagsComma)
   df2 <- data.frame(lapply(df[otherTags],function(x){
@@ -116,8 +118,7 @@ Please, take a look at the vignettes:
   
   df <- postprocessing(df, dbsource)
   
-  df <- df[names(df)!="Paper"]
-  df <- df[names(df)!="paper"]
+  df <- df[!names(df) %in% c("Paper", "paper")]
   
   return(df)
 }
@@ -127,9 +128,10 @@ postprocessing <-function(DATA,dbsource){
   
   # Authors' names cleaning (surname and initials)
   #remove ; and 2 or more spaces
-  DATA$AU=gsub("\\s+", " ", DATA$AU)
+  DATA$AU <- gsub("\\s+", " ", DATA$AU)
+  DATA$AF <- gsub("\\.|,","",DATA$AU)
   
-  listAU <- strsplit(DATA$AU, " and ")
+  listAU <- strsplit(DATA$AU, ";")
   
   AU <- lapply(listAU,function(l){
     
diff --git a/R/convert2df.R b/R/convert2df.R
index bd7d8cba..f5b5f233 100644
--- a/R/convert2df.R
+++ b/R/convert2df.R
@@ -92,11 +92,17 @@ convert2df<-function(file,dbsource="wos",format="plaintext", remove.duplicates=T
       switch(format,
              bibtex = {
                D <- importFiles(file)
-               M <- bib2df(D, dbsource = "isi")
+               M <- bib2df(D, dbsource = "isi") 
+               # %>% 
+               #   rename(AU_IN = .data$AU,
+               #          AU = .data$AF)
              },
              plaintext = {
                D <- importFiles(file)
-               M <- isi2df(D)
+               M <- isi2df(D) 
+               # %>% 
+               #   rename(AU_IN = .data$AU,
+               #          AU = .data$AF)
              })
     },
     ## db SCOPUS
@@ -105,9 +111,17 @@ convert2df<-function(file,dbsource="wos",format="plaintext", remove.duplicates=T
              bibtex = {
                D <- importFiles(file)
                M <- bib2df(D, dbsource = "scopus")
+               M <- M 
+               # %>% 
+               #   rename(AU_IN = .data$AU,
+               #          AU = .data$AF)
              },
              csv = {
-               M <- csvScopus2df(file)
+               M <- csvScopus2df(file) 
+               # %>% 
+               #   rename(AU_IN = .data$AU,
+               #          AU = .data$AF)
+               
              })
     },
     ## db GENERIC BIBTEX
diff --git a/R/histNetwork.R b/R/histNetwork.R
index 56bdc6e0..2a23bcb4 100644
--- a/R/histNetwork.R
+++ b/R/histNetwork.R
@@ -218,7 +218,7 @@ scopus <- function(M, min.citations, sep, network, verbose){
   
   M_merge <- M %>% 
     select(.data$AU,.data$PY,.data$Page.start, .data$Page.end, .data$PP, .data$SR) %>% 
-    mutate(AU = gsub(";.*$", "", .data$AU),
+    mutate(AU = trimws(gsub("\\.", "", gsub("\\. ", "", gsub("^(.*?),.*$", "\\1", .data$SR)))),
            Page.start = as.numeric(.data$Page.start),
            Page.end = as.numeric(.data$Page.end),
            PP = ifelse(!is.na(.data$Page.start), paste0(.data$Page.start,"-",.data$Page.end), NA),
@@ -232,6 +232,8 @@ scopus <- function(M, min.citations, sep, network, verbose){
     group_by(.data$PY,.data$AU) %>% 
     mutate(toRemove = ifelse(!is.na(.data$PP.y) & .data$PP.x!=.data$PP.y, TRUE,FALSE)) %>% # to remove FALSE POSITIVE
     ungroup() %>% 
+    dplyr::filter(.data$toRemove != TRUE) %>% 
+    mutate(toRemove = ifelse(!is.na(.data$PP.x) & is.na(.data$PP.y),TRUE,FALSE)) %>% 
     dplyr::filter(.data$toRemove != TRUE)
   
   LCS <- CR %>% 
diff --git a/inst/biblioshiny/utils.R b/inst/biblioshiny/utils.R
index 29566f3a..d729e292 100644
--- a/inst/biblioshiny/utils.R
+++ b/inst/biblioshiny/utils.R
@@ -367,13 +367,13 @@ Hindex_plot <- function(values, type, input){
     
     switch(type,
            author={
-             AU <- trim(gsub(",","",names(tableTag(values$M,"AU"))))
-             values$H <- Hindex(values$M, field = "author", elements = AU, sep = ";", years=Inf)$H %>% 
+             #AU <- trim(gsub(",","",names(tableTag(values$M,"AU"))))
+             values$H <- Hindex(values$M, field = "author", elements = NULL, sep = ";", years=Inf)$H %>% 
                arrange(desc(.data$h_index))
            },
            source={
-             SO <- names(sort(table(values$M$SO),decreasing = TRUE))
-             values$H <- Hindex(values$M, field = "source", elements = SO, sep = ";", years=Inf)$H %>% 
+             #SO <- names(sort(table(values$M$SO),decreasing = TRUE))
+             values$H <- Hindex(values$M, field = "source", elements = NULL, sep = ";", years=Inf)$H %>% 
                arrange(desc(.data$h_index))
            }
     )