Skip to content

Commit

Permalink
Merge pull request #469 from massimoaria/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
massimoaria authored Jun 6, 2024
2 parents 4adcf46 + a98bf68 commit a45ad5a
Show file tree
Hide file tree
Showing 3 changed files with 366 additions and 8 deletions.
41 changes: 37 additions & 4 deletions R/csvOA2df.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,42 @@ csvOA2df <- function(file){

DATA$AF <- DATA$AU
DATA$ID <- DATA$DE
DATA$AB=""
if (!"AB" %in% names(DATA)) DATA$AB=""
DATA$CR <- gsub("https://openalex.org/","",DATA$CR)
DATA$AU_ID <- gsub("https://openalex.org/","",DATA$AU_ID)
DATA$id_oa <- gsub("https://openalex.org/","",DATA$id_oa)
DATA$JI <- DATA$J9 <- gsub("https://openalex.org/","",DATA$SO_ID)
DATA$corresponding_author_ids <- gsub("https://openalex.org/","",DATA$corresponding_author_ids)
DATA$C1 <- gsub("https://", "", DATA$C1)
DATA$DB <- "OPENALEX"

# affilitation string
AFF <- DATA %>%
select(id_oa, starts_with("authorships_raw_affiliation_strings_"))

colId <- c(-1,parse_number(colnames(AFF)[-1]))

DATA <- AFF[order(colId)] %>%
unite(., C1, starts_with("authorships_raw_affiliation_strings_"), sep=";") %>%
mutate(C1 = gsub("NA","",C1),
C1 = TrimMult(C1,char=";")) %>%
bind_cols(DATA %>%
select(-"id_oa", -starts_with("authorships_raw_affiliation_strings_")))

DATA$C1 <- gsub("https://", "", DATA$C1)

# country string
CO <- DATA %>%
select(id_oa, starts_with("authorships_countries_"))

colId <- c(-1,parse_number(colnames(CO)[-1]))

DATA <- CO[order(colId)] %>%
unite(., AU_CO, starts_with("authorships_countries_"), sep=";") %>%
mutate(AU_CO = gsub("NA","",AU_CO),
AU_CO = TrimMult(AU_CO,char=";")) %>%
bind_cols(DATA %>%
select(-"id_oa", -starts_with("authorships_countries_")))

## corresponding author
DATA <- DATA %>%
mutate(AU1_ID = gsub(";.*", "", corresponding_author_ids))
Expand Down Expand Up @@ -119,7 +146,8 @@ relabelling_OA <- function(DATA){
label[label %in% "biblio_issue"] <- "IS"
label[label %in% "biblio_volume"] <- "VL"
label[label %in% "referenced_works" ] <- "CR"
label[label %in% "keywords_keyword"] <- "DE"
label[label %in% "keywords_display_name"] <- "DE"
label[label %in% "abstract"] <- "AB"
label[label %in% "concepts_display_name"] <- "CONCEPTS"
label[label %in% "topics_display_name"] <- "TOPICS"
label[label %in% "sustainable_development_goals_display_name"] <- "SDG"
Expand All @@ -128,8 +156,13 @@ relabelling_OA <- function(DATA){
label[label %in% "referenced_works_count"] <- "NR"
label[label %in% "language"] <- "LA"
label[label %in% "authorships_author_position"] <- "AU_POSITION"
label[label %in% "authorships_raw_affiliation_string"] <- "C1"
#label[label %in% "authorships_raw_affiliation_string"] <- "C1"
label[label %in% "doi"] <- "DI"
names(DATA) <- label
return(DATA)
}

TrimMult <- function(x, char=" ") {
return(gsub(paste0("^", char, "*|(?<=", char, ")", char, "|", char, "*$"),
"", x, perl=T))
}
Loading

0 comments on commit a45ad5a

Please sign in to comment.