|
|
|
@@ -52,11 +52,20 @@ repair_talks <- function(talks) { |
|
|
|
filter(talks, str_length(content) > 0) |
|
|
|
} |
|
|
|
|
|
|
|
# tries to find the correct redner id given a name |
|
|
|
# this is sufficient since every prename lastname combination in the bundestag is |
|
|
|
# unique (luckily :D) |
|
|
|
# returns a lookup table |
|
|
|
lookup_redner <- function(comments, redner) { |
|
|
|
#' Lookup name in speakers table |
|
|
|
#' |
|
|
|
#' Tries to find the correct speaker id given a name. |
|
|
|
#' This is sufficient since every prename lastname combination in the bundestag is |
|
|
|
#' unique (luckily :D) |
|
|
|
#' |
|
|
|
#' @param tb tibble |
|
|
|
#' @param redner tibble |
|
|
|
#' @param name_variable name |
|
|
|
#' |
|
|
|
#' Tries to match the name_variable column with speaker names |
|
|
|
#' |
|
|
|
#' returns a lookup table |
|
|
|
lookup_redner <- function(tb, redner, name_variable) { |
|
|
|
tobereplaced <- "[-–—‑- ]" |
|
|
|
redner %>% |
|
|
|
unite(name, vorname, nachname, sep=".*") %>% |
|
|
|
@@ -69,16 +78,18 @@ lookup_redner <- function(comments, redner) { |
|
|
|
if (length(matches) == 0) return(NA_character_) |
|
|
|
rs[head(matches, 1), ]$id |
|
|
|
} |
|
|
|
comments %>% |
|
|
|
distinct(kommentator) %>% |
|
|
|
mutate(redner = Vectorize(find_match)(str_replace_all(kommentator, tobereplaced, ""))) |
|
|
|
tb %>% |
|
|
|
distinct({{name_variable}}) %>% |
|
|
|
mutate(redner = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) |
|
|
|
} |
|
|
|
|
|
|
|
repair_comments <- function(comments, redner) { |
|
|
|
cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", |
|
|
|
"Use repair(, repair_commments = FALSE) to skip this.\n")) |
|
|
|
# try to find a redner id for each actual comment |
|
|
|
comments %>% |
|
|
|
filter(!is.na(kommentator)) %>% |
|
|
|
lookup_redner(redner) %>% |
|
|
|
lookup_redner(redner, kommentator) %>% |
|
|
|
left_join(comments, ., by="kommentator") %>% |
|
|
|
select(-kommentator) |
|
|
|
} |
|
|
|
|