|
|
|
@@ -39,7 +39,7 @@ read_all <- function(path="inst/records/") { |
|
|
|
select(-type) -> |
|
|
|
comments |
|
|
|
filter(commentsandapplause, type == "applause") %>% |
|
|
|
select(-type, -kommentator, -content) %>% |
|
|
|
select(-type, -commenter, -content) %>% |
|
|
|
mutate("CDU_CSU" = str_detect(fraction, "CDU/CSU"), |
|
|
|
"SPD" = str_detect(fraction, "SPD"), |
|
|
|
"FDP" = str_detect(fraction, "FDP"), |
|
|
|
@@ -86,17 +86,17 @@ xml_get <- function(node, name) { |
|
|
|
parse_speaker <- function(speaker_xml) { |
|
|
|
speaker_id <- xml_attr(speaker_xml, "id") |
|
|
|
nm <- xml_child(speaker_xml) |
|
|
|
vorname <- xml_get(nm, "vorname") |
|
|
|
nachname <- xml_get(nm, "nachname") |
|
|
|
prename <- xml_get(nm, "vorname") |
|
|
|
lastname <- xml_get(nm, "nachname") |
|
|
|
fraction <- xml_get(nm, "fraktion") |
|
|
|
titel <- xml_get(nm, "titel") |
|
|
|
rolle <- xml_find_all(nm, "rolle") |
|
|
|
if (length(rolle) > 0) { |
|
|
|
rolle_lang <- xml_get(rolle, "rolle_lang") |
|
|
|
rolle_kurz <- xml_get(rolle, "rolle_kurz") |
|
|
|
} else rolle_kurz <- rolle_lang <- NA_character_ |
|
|
|
c(id = speaker_id, vorname = vorname, nachname = nachname, fraction = fraction, titel = titel, |
|
|
|
rolle_kurz = rolle_kurz, rolle_lang = rolle_lang) |
|
|
|
title <- xml_get(nm, "titel") |
|
|
|
role <- xml_find_all(nm, "rolle") |
|
|
|
if (length(role) > 0) { |
|
|
|
role_long <- xml_get(role, "rolle_lang") |
|
|
|
role_short <- xml_get(role, "rolle_kurz") |
|
|
|
} else role_short <- role_long <- NA_character_ |
|
|
|
c(id = speaker_id, prename = prename, lastname = lastname, fraction = fraction, title = title, |
|
|
|
role_short = role_short, role_long = role_long) |
|
|
|
} |
|
|
|
|
|
|
|
# parse one speech |
|
|
|
@@ -165,10 +165,10 @@ parse_comment <- function(comment, speech_id, on_speaker) { |
|
|
|
sapply(partial(flip(head), 1) %.% agrep, x=fractionnames, max=0.2, value=T) %>% |
|
|
|
str_c(collapse=",") -> |
|
|
|
by |
|
|
|
c(base, type = "applause", fraction = by, kommentator = NA_character_, content = comment) |
|
|
|
c(base, type = "applause", fraction = by, commenter = NA_character_, content = comment) |
|
|
|
} else { |
|
|
|
ps <- str_match(comment, "(.*) \\[(.*?)\\]: (.*)")[1,] |
|
|
|
c(base, type = "comment", fraction = ps[3], kommentator = ps[2], content = ps[4]) |
|
|
|
c(base, type = "comment", fraction = ps[3], commenter = ps[2], content = ps[4]) |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
@@ -187,7 +187,7 @@ parse_speechlist <- function(speechlist_xml, date) { |
|
|
|
on_speaker = comments["on_speaker",], |
|
|
|
type = comments["type",], |
|
|
|
fraction = comments["fraction",], |
|
|
|
kommentator = comments["kommentator",], |
|
|
|
commenter = comments["commenter",], |
|
|
|
content = comments["content", ])) |
|
|
|
} |
|
|
|
|
|
|
|
@@ -195,12 +195,12 @@ parse_speechlist <- function(speechlist_xml, date) { |
|
|
|
parse_speakerlist <- function(speakerliste_xml) { |
|
|
|
d <- sapply(speakerliste_xml, parse_speaker) |
|
|
|
tibble(id = d["id",], |
|
|
|
vorname = d["vorname",], |
|
|
|
nachname = d["nachname",], |
|
|
|
prename = d["prename",], |
|
|
|
lastname = d["lastname",], |
|
|
|
fraction = d["fraction",], |
|
|
|
titel = d["titel",], |
|
|
|
rolle_kurz = d["rolle_kurz",], |
|
|
|
rolle_lang = d["rolle_lang",]) |
|
|
|
title = d["title",], |
|
|
|
role_short = d["role_short",], |
|
|
|
role_long = d["role_long",]) |
|
|
|
} |
|
|
|
|
|
|
|
#' Write the parsed and repaired results into separate csv files |
|
|
|
|