From 5dc308c16e1fcfeab0c65681345191c8d620d7a3 Mon Sep 17 00:00:00 2001 From: flavis Date: Tue, 3 Aug 2021 13:40:02 +0200 Subject: [PATCH] generalize lookup_redner helper to allow lookup of names in arbitrary tables, add info text --- R/repair.R | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/R/repair.R b/R/repair.R index ba62357..e1bfdb9 100644 --- a/R/repair.R +++ b/R/repair.R @@ -52,11 +52,20 @@ repair_talks <- function(talks) { filter(talks, str_length(content) > 0) } -# tries to find the correct redner id given a name -# this is sufficient since every prename lastname combination in the bundestag is -# unique (luckily :D) -# returns a lookup table -lookup_redner <- function(comments, redner) { +#' Lookup name in speakers table +#' +#' Tries to find the correct speaker id given a name. +#' This is sufficient since every prename lastname combination in the bundestag is +#' unique (luckily :D) +#' +#' @param tb tibble +#' @param redner tibble +#' @param name_variable name +#' +#' Tries to match the name_variable column with speaker names +#' +#' returns a lookup table +lookup_redner <- function(tb, redner, name_variable) { tobereplaced <- "[-–—‑­­-­­­ ]" redner %>% unite(name, vorname, nachname, sep=".*") %>% @@ -69,16 +78,18 @@ lookup_redner <- function(comments, redner) { if (length(matches) == 0) return(NA_character_) rs[head(matches, 1), ]$id } - comments %>% - distinct(kommentator) %>% - mutate(redner = Vectorize(find_match)(str_replace_all(kommentator, tobereplaced, ""))) + tb %>% + distinct({{name_variable}}) %>% + mutate(redner = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) } repair_comments <- function(comments, redner) { + cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", + "Use repair(, repair_commments = FALSE) to skip this.\n")) # try to find a redner id for each actual comment comments %>% filter(!is.na(kommentator)) %>% - lookup_redner(redner) %>% + lookup_redner(redner, kommentator) %>% left_join(comments, ., by="kommentator") %>% select(-kommentator) }