| @@ -4,33 +4,35 @@ library("xml2") | |||||
| library(tibble) | library(tibble) | ||||
| library(dplyr) | library(dplyr) | ||||
| library(magrittr) | library(magrittr) | ||||
| library(pbapply) | |||||
| # for usage see the example at the end | # for usage see the example at the end | ||||
| read_all <- function() { | read_all <- function() { | ||||
| cat("Reading all protocols from", DOWNLOAD_DIR, "\n") | |||||
| available_protocols <- list.files(DOWNLOAD_DIR) | available_protocols <- list.files(DOWNLOAD_DIR) | ||||
| res <- lapply(available_protocols, read_one) | |||||
| res <- pblapply(available_protocols, read_one) | |||||
| sapply(res, `[[`, "redner") %>% | |||||
| lapply(res, `[[`, "redner") %>% | |||||
| bind_rows() %>% | bind_rows() %>% | ||||
| distinct() -> | distinct() -> | ||||
| redner | redner | ||||
| sapply(res, `[[`, "reden") %>% | |||||
| lapply(res, `[[`, "reden") %>% | |||||
| bind_rows() %>% | bind_rows() %>% | ||||
| distinct() -> | distinct() -> | ||||
| reden | reden | ||||
| sapply(res, `[[`, "talks") %>% | |||||
| lapply(res, `[[`, "talks") %>% | |||||
| bind_rows() %>% | bind_rows() %>% | ||||
| distinct() -> | distinct() -> | ||||
| talks | talks | ||||
| list(redner = redner, reden = reden, talks = talks) | list(redner = redner, reden = reden, talks = talks) | ||||
| } | } | ||||
| # this reads all currently parseable data from one xml | # this reads all currently parseable data from one xml | ||||
| read_one <- function(name) { | read_one <- function(name) { | ||||
| print(paste("reading", name)) | |||||
| x <- tryCatch(read_xml(paste0(DOWNLOAD_DIR, name)), | x <- tryCatch(read_xml(paste0(DOWNLOAD_DIR, name)), | ||||
| error = function(c) NULL) | error = function(c) NULL) | ||||
| if (is.null(x)) return(NULL) | if (is.null(x)) return(NULL) | ||||
| @@ -47,6 +49,7 @@ read_one <- function(name) { | |||||
| xml_find_all("rede") %>% | xml_find_all("rede") %>% | ||||
| parse_redenliste() -> | parse_redenliste() -> | ||||
| res | res | ||||
| list(redner = redner, reden = res$reden, talks = res$talks) | list(redner = redner, reden = res$reden, talks = res$talks) | ||||
| } | } | ||||
| @@ -137,10 +140,10 @@ parse_rednerliste <- function(rednerliste_xml) { | |||||
| # EXAMPLE USE | # EXAMPLE USE | ||||
| # make sure data ist downloaded via fetch.R | # make sure data ist downloaded via fetch.R | ||||
| res <- read_one("19126-data.xml") | |||||
| res$redner | |||||
| res$reden | |||||
| res$talks | |||||
| # res <- read_one("19126-data.xml") | |||||
| # | |||||
| # res$redner | |||||
| # res$reden | |||||
| # res$talks | |||||
| # ------------------------------- | # ------------------------------- | ||||