|
|
@@ -28,9 +28,14 @@ read_all <- function(path="records/") { |
|
|
distinct() -> |
|
|
distinct() -> |
|
|
talks |
|
|
talks |
|
|
|
|
|
|
|
|
|
|
|
lapply(res, `[[`, "comments") %>% |
|
|
|
|
|
bind_rows() %>% |
|
|
|
|
|
distinct() -> |
|
|
|
|
|
comments |
|
|
|
|
|
|
|
|
if (length(available_protocols) == 0) |
|
|
if (length(available_protocols) == 0) |
|
|
warning("The given directory is empty or does not exist.") |
|
|
warning("The given directory is empty or does not exist.") |
|
|
list(redner = redner, reden = reden, talks = talks) |
|
|
|
|
|
|
|
|
list(redner = redner, reden = reden, talks = talks, comments = comments) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
# this reads all currently parseable data from one xml |
|
|
# this reads all currently parseable data from one xml |
|
|
@@ -99,7 +104,8 @@ parse_rede <- function(rede_xml) { |
|
|
content = cur_content) |
|
|
content = cur_content) |
|
|
reden <- c(reden, list(rede)) |
|
|
reden <- c(reden, list(rede)) |
|
|
cur_content <- "" |
|
|
cur_content <- "" |
|
|
} else { |
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
if (is.na(principal_redner) && xml_name(node) != "name") { |
|
|
principal_redner <- xml_child(node) %>% xml_attr("id") |
|
|
principal_redner <- xml_child(node) %>% xml_attr("id") |
|
|
} |
|
|
} |
|
|
if (xml_name(node) == "name") { |
|
|
if (xml_name(node) == "name") { |
|
|
@@ -144,9 +150,10 @@ parse_comment <- function(comment, rede_id, on_redner) { |
|
|
# - actually separate content properly |
|
|
# - actually separate content properly |
|
|
# - differentiate between [AfD] and AfD in by |
|
|
# - differentiate between [AfD] and AfD in by |
|
|
if(str_detect(comment, "Beifall")) { |
|
|
if(str_detect(comment, "Beifall")) { |
|
|
c(base, type = "applause", by = by, content = comment) |
|
|
|
|
|
|
|
|
c(base, type = "applause", fraktion = by, kommentator = NA_character_, content = comment) |
|
|
} else { |
|
|
} else { |
|
|
c(base, type = "comment", by = by, content = comment) |
|
|
|
|
|
|
|
|
ps <- str_match(comment, "(.*) \\[(.*?)\\]: (.*)")[1,] |
|
|
|
|
|
c(base, type = "comment", fraktion = ps[3], kommentator = ps[2], content = ps[4]) |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
@@ -163,7 +170,8 @@ parse_redenliste <- function(redenliste_xml) { |
|
|
comments = tibble(rede_id = comments["rede_id",], |
|
|
comments = tibble(rede_id = comments["rede_id",], |
|
|
on_redner = comments["on_redner",], |
|
|
on_redner = comments["on_redner",], |
|
|
type = comments["type",], |
|
|
type = comments["type",], |
|
|
by = comments["by",], |
|
|
|
|
|
|
|
|
fraktion = comments["fraktion",], |
|
|
|
|
|
kommentator = comments["kommentator",], |
|
|
content = comments["content", ])) |
|
|
content = comments["content", ])) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
@@ -183,7 +191,7 @@ parse_rednerliste <- function(rednerliste_xml) { |
|
|
# EXAMPLE USE |
|
|
# EXAMPLE USE |
|
|
|
|
|
|
|
|
# make sure data ist downloaded via fetch.R |
|
|
# make sure data ist downloaded via fetch.R |
|
|
res <- read_one("records/19126-data.xml") |
|
|
|
|
|
|
|
|
# res <- read_one("records/19126-data.xml") |
|
|
# |
|
|
# |
|
|
# res$redner |
|
|
# res$redner |
|
|
# res$reden |
|
|
# res$reden |
|
|
|