|
|
@@ -1,4 +1,5 @@ |
|
|
fractions <- c("AFD" = "AfD", |
|
|
fractions <- c("AFD" = "AfD", |
|
|
|
|
|
"AFD&FRAKTIONSLOS" = "AfD&Fraktionslos", |
|
|
"BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN", |
|
|
"BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN", |
|
|
"BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN", |
|
|
"BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN", |
|
|
"FRAKTIONSLOS" = "Fraktionslos", |
|
|
"FRAKTIONSLOS" = "Fraktionslos", |
|
|
@@ -51,11 +52,20 @@ repair_talks <- function(talks) { |
|
|
filter(talks, str_length(content) > 0) |
|
|
filter(talks, str_length(content) > 0) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
# tries to find the correct speaker id given a name |
|
|
|
|
|
# this is sufficient since every prename lastname combination in the bundestag is |
|
|
|
|
|
# unique (luckily :D) |
|
|
|
|
|
# returns a lookup table |
|
|
|
|
|
lookup_speaker <- function(comments, speaker) { |
|
|
|
|
|
|
|
|
#' Lookup name in speakers table |
|
|
|
|
|
#' |
|
|
|
|
|
#' Tries to find the correct speaker id given a name. |
|
|
|
|
|
#' This is sufficient since every prename lastname combination in the bundestag is |
|
|
|
|
|
#' unique (luckily :D) |
|
|
|
|
|
#' |
|
|
|
|
|
#' @param tb tibble |
|
|
|
|
|
#' @param redner tibble |
|
|
|
|
|
#' @param name_variable name |
|
|
|
|
|
#' |
|
|
|
|
|
#' Tries to match the name_variable column with speaker names |
|
|
|
|
|
#' |
|
|
|
|
|
#' returns a lookup table |
|
|
|
|
|
lookup_speaker <- function(tb, speaker, name_variable) { |
|
|
tobereplaced <- "[-–—‑- ]" |
|
|
tobereplaced <- "[-–—‑- ]" |
|
|
speaker %>% |
|
|
speaker %>% |
|
|
unite(name, vorname, nachname, sep=".*") %>% |
|
|
unite(name, vorname, nachname, sep=".*") %>% |
|
|
@@ -68,29 +78,38 @@ lookup_speaker <- function(comments, speaker) { |
|
|
if (length(matches) == 0) return(NA_character_) |
|
|
if (length(matches) == 0) return(NA_character_) |
|
|
rs[head(matches, 1), ]$id |
|
|
rs[head(matches, 1), ]$id |
|
|
} |
|
|
} |
|
|
comments %>% |
|
|
|
|
|
distinct(kommentator) %>% |
|
|
|
|
|
mutate(speaker = Vectorize(find_match)(str_replace_all(kommentator, tobereplaced, ""))) |
|
|
|
|
|
|
|
|
tb %>% |
|
|
|
|
|
distinct({{name_variable}}) %>% |
|
|
|
|
|
mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
repair_comments <- function(comments, speaker) { |
|
|
|
|
|
# try to find a speaker id for each actual comment |
|
|
|
|
|
|
|
|
repair_comments <- function(comments, redner) { |
|
|
|
|
|
cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", |
|
|
|
|
|
"Use repair(, repair_commments = FALSE) to skip this.\n")) |
|
|
|
|
|
# try to find a redner id for each actual comment |
|
|
comments %>% |
|
|
comments %>% |
|
|
filter(!is.na(kommentator)) %>% |
|
|
filter(!is.na(kommentator)) %>% |
|
|
lookup_speaker(speaker) %>% |
|
|
|
|
|
|
|
|
lookup_redner(redner, kommentator) %>% |
|
|
left_join(comments, ., by="kommentator") %>% |
|
|
left_join(comments, ., by="kommentator") %>% |
|
|
select(-kommentator) |
|
|
select(-kommentator) |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
#' Repair parsed tables |
|
|
#' Repair parsed tables |
|
|
#' |
|
|
#' |
|
|
|
|
|
#' TODO: Explain repair_comments argument |
|
|
|
|
|
#' (if TRUE, we try to lookup redner names in redner table) |
|
|
|
|
|
#' |
|
|
|
|
|
#' Possible test: check identical(repair(res), repair(repair(res))) == TRUE |
|
|
|
|
|
#' Since repaired tables should be a fixpoint of repair. |
|
|
#' @export |
|
|
#' @export |
|
|
repair <- function(parse_output) { |
|
|
|
|
|
list(speaker = repair_speaker(parse_output$speaker), |
|
|
|
|
|
speeches = repair_speeches(parse_output$speeches), |
|
|
|
|
|
|
|
|
repair <- function(parse_output, repair_comments = FALSE) { |
|
|
|
|
|
|
|
|
|
|
|
list(redner = repair_speaker(parse_output$speaker), |
|
|
|
|
|
reden = repair_speeches(parse_output$speeches), |
|
|
talks = repair_talks(parse_output$talks), |
|
|
talks = repair_talks(parse_output$talks), |
|
|
#comments = repair_comments(parse_output$comments) |
|
|
|
|
|
comments = parse_output$comments, |
|
|
|
|
|
|
|
|
comments = if(repair_comments) repair_comments(parse_output$comments, |
|
|
|
|
|
parse_output$speaker) |
|
|
|
|
|
else parse_output$comments, |
|
|
applause = parse_output$applause |
|
|
applause = parse_output$applause |
|
|
) |
|
|
) |
|
|
} |
|
|
} |