|
|
|
@@ -4,33 +4,35 @@ library("xml2") |
|
|
|
library(tibble) |
|
|
|
library(dplyr) |
|
|
|
library(magrittr) |
|
|
|
library(pbapply) |
|
|
|
|
|
|
|
# for usage see the example at the end |
|
|
|
|
|
|
|
read_all <- function() { |
|
|
|
cat("Reading all protocols from", DOWNLOAD_DIR, "\n") |
|
|
|
available_protocols <- list.files(DOWNLOAD_DIR) |
|
|
|
res <- lapply(available_protocols, read_one) |
|
|
|
res <- pblapply(available_protocols, read_one) |
|
|
|
|
|
|
|
sapply(res, `[[`, "redner") %>% |
|
|
|
lapply(res, `[[`, "redner") %>% |
|
|
|
bind_rows() %>% |
|
|
|
distinct() -> |
|
|
|
redner |
|
|
|
|
|
|
|
sapply(res, `[[`, "reden") %>% |
|
|
|
lapply(res, `[[`, "reden") %>% |
|
|
|
bind_rows() %>% |
|
|
|
distinct() -> |
|
|
|
reden |
|
|
|
|
|
|
|
sapply(res, `[[`, "talks") %>% |
|
|
|
lapply(res, `[[`, "talks") %>% |
|
|
|
bind_rows() %>% |
|
|
|
distinct() -> |
|
|
|
talks |
|
|
|
|
|
|
|
list(redner = redner, reden = reden, talks = talks) |
|
|
|
} |
|
|
|
|
|
|
|
# this reads all currently parseable data from one xml |
|
|
|
read_one <- function(name) { |
|
|
|
print(paste("reading", name)) |
|
|
|
x <- tryCatch(read_xml(paste0(DOWNLOAD_DIR, name)), |
|
|
|
error = function(c) NULL) |
|
|
|
if (is.null(x)) return(NULL) |
|
|
|
@@ -47,6 +49,7 @@ read_one <- function(name) { |
|
|
|
xml_find_all("rede") %>% |
|
|
|
parse_redenliste() -> |
|
|
|
res |
|
|
|
|
|
|
|
list(redner = redner, reden = res$reden, talks = res$talks) |
|
|
|
} |
|
|
|
|
|
|
|
@@ -137,10 +140,10 @@ parse_rednerliste <- function(rednerliste_xml) { |
|
|
|
# EXAMPLE USE |
|
|
|
|
|
|
|
# make sure data ist downloaded via fetch.R |
|
|
|
res <- read_one("19126-data.xml") |
|
|
|
|
|
|
|
res$redner |
|
|
|
res$reden |
|
|
|
res$talks |
|
|
|
# res <- read_one("19126-data.xml") |
|
|
|
# |
|
|
|
# res$redner |
|
|
|
# res$reden |
|
|
|
# res$talks |
|
|
|
|
|
|
|
# ------------------------------- |