|
|
|
@@ -1,7 +1,7 @@ |
|
|
|
fractions <- c("AFD" = "AfD", |
|
|
|
"AFD&FRAKTIONSLOS" = "AfD&Fraktionslos", |
|
|
|
"BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN", |
|
|
|
"BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN", |
|
|
|
"BÜNDNIS90/" = "BÜNDNIS 90/DIE GRÜNEN", |
|
|
|
"BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90/DIE GRÜNEN", |
|
|
|
"FRAKTIONSLOS" = "Fraktionslos", |
|
|
|
"DIELINKE" = "DIE LINKE", |
|
|
|
"SPD" = "SPD", |
|
|
|
@@ -81,35 +81,39 @@ lookup_speaker <- function(tb, speaker, name_variable) { |
|
|
|
mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) |
|
|
|
} |
|
|
|
|
|
|
|
repair_comments <- function(comments, speaker) { |
|
|
|
cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", |
|
|
|
"Use repair(, repair_commments = FALSE) to skip this.\n")) |
|
|
|
# try to find a speaker id for each actual comment |
|
|
|
repair_comments <- function(comments, speaker, lookup_speaker=F) { |
|
|
|
comments %>% |
|
|
|
filter(!is.na(commenter)) %>% |
|
|
|
lookup_speaker(speaker, commenter) %>% |
|
|
|
left_join(comments, ., by="commenter") %>% |
|
|
|
select(-commenter) |
|
|
|
filter(!is.na(commenter) | !is.na(content) | !is.na(fraction)) -> |
|
|
|
tb |
|
|
|
if (lookup_speaker) { |
|
|
|
cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", |
|
|
|
"Use repair(, lookup_speaker = FALSE) to skip this.\n")) |
|
|
|
# try to find a speaker id for each actual comment |
|
|
|
tb %>% |
|
|
|
filter(!is.na(commenter)) %>% |
|
|
|
lookup_speaker(speaker, commenter) %>% |
|
|
|
left_join(tb, ., by="commenter") |
|
|
|
} else tb |
|
|
|
} |
|
|
|
|
|
|
|
#' Repair parsed tables |
|
|
|
#' |
|
|
|
#' @param parse_output tibble |
|
|
|
#' @param repair_comments bool |
|
|
|
#' @param lookup_speaker bool |
|
|
|
#' |
|
|
|
#' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table. |
|
|
|
#' If lookup_speaker is TRUE, members of the parliament mentioned in comments are looked up in speaker table. |
|
|
|
#' |
|
|
|
#' Possible test: check identical(repair(res), repair(repair(res))) == TRUE |
|
|
|
#' Since repaired tables should be a fixpoint of repair. |
|
|
|
#' @export |
|
|
|
repair <- function(parse_output, repair_comments = FALSE) { |
|
|
|
|
|
|
|
repair <- function(parse_output, lookup_speaker = FALSE) { |
|
|
|
is_valid_res(parse_output) |
|
|
|
stopifnot("lookup_speaker must be of type logical" = is.logical(lookup_speaker)) |
|
|
|
list(speaker = repair_speaker(parse_output$speaker), |
|
|
|
speeches = repair_speeches(parse_output$speeches), |
|
|
|
talks = repair_talks(parse_output$talks), |
|
|
|
comments = if(repair_comments) repair_comments(parse_output$comments, |
|
|
|
parse_output$speaker) |
|
|
|
else parse_output$comments, |
|
|
|
applause = parse_output$applause |
|
|
|
) |
|
|
|
comments = repair_comments(parse_output$comments, |
|
|
|
parse_output$speaker, |
|
|
|
lookup_speaker), |
|
|
|
applause = parse_output$applause) |
|
|
|
} |