| @@ -52,11 +52,20 @@ repair_talks <- function(talks) { | |||||
| filter(talks, str_length(content) > 0) | filter(talks, str_length(content) > 0) | ||||
| } | } | ||||
| # tries to find the correct redner id given a name | |||||
| # this is sufficient since every prename lastname combination in the bundestag is | |||||
| # unique (luckily :D) | |||||
| # returns a lookup table | |||||
| lookup_redner <- function(comments, redner) { | |||||
| #' Lookup name in speakers table | |||||
| #' | |||||
| #' Tries to find the correct speaker id given a name. | |||||
| #' This is sufficient since every prename lastname combination in the bundestag is | |||||
| #' unique (luckily :D) | |||||
| #' | |||||
| #' @param tb tibble | |||||
| #' @param redner tibble | |||||
| #' @param name_variable name | |||||
| #' | |||||
| #' Tries to match the name_variable column with speaker names | |||||
| #' | |||||
| #' returns a lookup table | |||||
| lookup_redner <- function(tb, redner, name_variable) { | |||||
| tobereplaced <- "[-–—‑- ]" | tobereplaced <- "[-–—‑- ]" | ||||
| redner %>% | redner %>% | ||||
| unite(name, vorname, nachname, sep=".*") %>% | unite(name, vorname, nachname, sep=".*") %>% | ||||
| @@ -69,16 +78,18 @@ lookup_redner <- function(comments, redner) { | |||||
| if (length(matches) == 0) return(NA_character_) | if (length(matches) == 0) return(NA_character_) | ||||
| rs[head(matches, 1), ]$id | rs[head(matches, 1), ]$id | ||||
| } | } | ||||
| comments %>% | |||||
| distinct(kommentator) %>% | |||||
| mutate(redner = Vectorize(find_match)(str_replace_all(kommentator, tobereplaced, ""))) | |||||
| tb %>% | |||||
| distinct({{name_variable}}) %>% | |||||
| mutate(redner = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) | |||||
| } | } | ||||
| repair_comments <- function(comments, redner) { | repair_comments <- function(comments, redner) { | ||||
| cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", | |||||
| "Use repair(, repair_commments = FALSE) to skip this.\n")) | |||||
| # try to find a redner id for each actual comment | # try to find a redner id for each actual comment | ||||
| comments %>% | comments %>% | ||||
| filter(!is.na(kommentator)) %>% | filter(!is.na(kommentator)) %>% | ||||
| lookup_redner(redner) %>% | |||||
| lookup_redner(redner, kommentator) %>% | |||||
| left_join(comments, ., by="kommentator") %>% | left_join(comments, ., by="kommentator") %>% | ||||
| select(-kommentator) | select(-kommentator) | ||||
| } | } | ||||