Skip to content

Commit 2f9bdb9

Browse files
committed
Refactor more github scraping
1 parent ae1d874 commit 2f9bdb9

File tree

1 file changed

+19
-13
lines changed

1 file changed

+19
-13
lines changed

R/buildtools.R

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,8 @@ get_maintainer_info <- function(path = '.'){
469469
info$mastodon <- socials$mastodon
470470
info$bluesky <- socials$bluesky
471471
info$linkedin <- socials$linkedin
472+
info$orcid <- socials$orcid #might be overridden below
473+
info$twitter <- socials$x #todo: drop this
472474
}
473475
uuid <- Sys.getenv('MAINTAINERUUID', "")
474476
if(nchar(uuid)){
@@ -479,27 +481,31 @@ get_maintainer_info <- function(path = '.'){
479481
authors <- utils:::.read_authors_at_R_field(aar)
480482
maintainer <- Filter(function(x){"cre" %in% x$role}, authors)
481483
if(!length(maintainer)) return(info)
482-
orcid <- as.list(maintainer[[1]]$comment)$ORCID
484+
info$orcid <- parse_orcid_id(as.list(maintainer[[1]]$comment)$ORCID)
485+
return(info)
486+
}
487+
488+
parse_orcid_id <- function(str){
483489
pattern <- '0000-[0-9]{4}-[0-9]{4}-[0-9]{3}[0-9X]'
484-
m <- regexpr(pattern, orcid)
485-
result <- regmatches(orcid, m)
490+
m <- regexpr(pattern, str)
491+
result <- regmatches(str, m)
486492
if(length(result)){
487-
info$orcid <- result
493+
result
488494
}
489-
return(info)
490495
}
491496

492497
scrape_github_socials <- function(login){
493-
networks <- c('Bluesky', 'Mastodon', 'LinkedIn')
494498
tryCatch({
495499
doc <- xml2::read_html(paste0("http://github.com/", login))
496-
out <- lapply(networks, function(network){
497-
link <- xml2::xml_find_all(doc, sprintf('//li[svg/title = "%s"]/a', network))
498-
if(length(link)){
499-
xml2::xml_attr(link, 'href')
500-
}
501-
})
502-
structure(out, names = tolower(networks))
500+
vcards <- xml2::xml_find_all(doc, "//li[contains(@class, 'vcard-detail')]")
501+
titles <- xml2::xml_text(xml2::xml_find_first(vcards, 'svg/title'))
502+
links <- xml2::xml_text(xml2::xml_find_first(vcards, 'a'))
503+
is_orcid <- grepl("orcid", links, fixed = TRUE)
504+
links[is_orcid] <- parse_orcid_id(links[is_orcid])
505+
titles[is_orcid] <- 'orcid'
506+
links <- as.list(links[!is.na(titles)])
507+
names(links) <- tolower(titles[!is.na(titles)])
508+
links
503509
}, error = message)
504510
}
505511

0 commit comments

Comments
 (0)