| @@ -2,13 +2,38 @@ source("config.R") | |||||
| source("../utils/helpers.R") | source("../utils/helpers.R") | ||||
| library("xml2") | library("xml2") | ||||
| library(tibble) | library(tibble) | ||||
| library(dplyr) | |||||
| library(magrittr) | library(magrittr) | ||||
| # for usage see the example at the end | # for usage see the example at the end | ||||
| read_all <- function() { | |||||
| available_protocols <- list.files(DOWNLOAD_DIR) | |||||
| res <- lapply(available_protocols, read_one) | |||||
| sapply(res, `[[`, "redner") %>% | |||||
| bind_rows() %>% | |||||
| distinct() -> | |||||
| redner | |||||
| sapply(res, `[[`, "reden") %>% | |||||
| bind_rows() %>% | |||||
| distinct() -> | |||||
| reden | |||||
| sapply(res, `[[`, "talks") %>% | |||||
| bind_rows() %>% | |||||
| distinct() -> | |||||
| talks | |||||
| list(redner = redner, reden = reden, talks = talks) | |||||
| } | |||||
| # this reads all currently parseable data from one xml | # this reads all currently parseable data from one xml | ||||
| read_one <- function(name) { | read_one <- function(name) { | ||||
| x <- read_xml(paste0(DOWNLOAD_DIR, name)) | |||||
| print(paste("reading", name)) | |||||
| x <- tryCatch(read_xml(paste0(DOWNLOAD_DIR, name)), | |||||
| error = function(c) NULL) | |||||
| if (is.null(x)) return(NULL) | |||||
| cs <- xml_children(x) | cs <- xml_children(x) | ||||
| verlauf <- xml_find_first(x, "sitzungsverlauf") | verlauf <- xml_find_first(x, "sitzungsverlauf") | ||||
| @@ -60,7 +85,8 @@ parse_rede <- function(rede_xml) { | |||||
| reden <- list() | reden <- list() | ||||
| for (node in cs) { | for (node in cs) { | ||||
| if (xml_name(node) == "p") { | if (xml_name(node) == "p") { | ||||
| if (xml_attr(node, "klasse") == "redner") { | |||||
| klasse <- xml_attr(node, "klasse") | |||||
| if (!is.na(klasse) && klasse == "redner") { | |||||
| if (!is.na(cur_redner)) { | if (!is.na(cur_redner)) { | ||||
| rede <- c(rede_id = rede_id, | rede <- c(rede_id = rede_id, | ||||
| redner = cur_redner, | redner = cur_redner, | ||||
| @@ -111,7 +137,7 @@ parse_rednerliste <- function(rednerliste_xml) { | |||||
| # EXAMPLE USE | # EXAMPLE USE | ||||
| # make sure data ist downloaded via fetch.R | # make sure data ist downloaded via fetch.R | ||||
| res <- read_one("19038-data.xml") | |||||
| res <- read_one("19126-data.xml") | |||||
| res$redner | res$redner | ||||
| res$reden | res$reden | ||||