Przeglądaj źródła

generalize lookup_redner helper to allow lookup of names in arbitrary tables, add info text

genderequality-alternative
flavis 4 lat temu
rodzic
commit
5dc308c16e
1 zmienionych plików z 20 dodań i 9 usunięć
  1. +20
    -9
      R/repair.R

+ 20
- 9
R/repair.R Wyświetl plik

@@ -52,11 +52,20 @@ repair_talks <- function(talks) {
filter(talks, str_length(content) > 0)
}

# tries to find the correct redner id given a name
# this is sufficient since every prename lastname combination in the bundestag is
# unique (luckily :D)
# returns a lookup table
lookup_redner <- function(comments, redner) {
#' Lookup name in speakers table
#'
#' Tries to find the correct speaker id given a name.
#' This is sufficient since every prename lastname combination in the bundestag is
#' unique (luckily :D)
#'
#' @param tb tibble
#' @param redner tibble
#' @param name_variable name
#'
#' Tries to match the name_variable column with speaker names
#'
#' returns a lookup table
lookup_redner <- function(tb, redner, name_variable) {
tobereplaced <- "[-–—‑­­-­­­ ]"
redner %>%
unite(name, vorname, nachname, sep=".*") %>%
@@ -69,16 +78,18 @@ lookup_redner <- function(comments, redner) {
if (length(matches) == 0) return(NA_character_)
rs[head(matches, 1), ]$id
}
comments %>%
distinct(kommentator) %>%
mutate(redner = Vectorize(find_match)(str_replace_all(kommentator, tobereplaced, "")))
tb %>%
distinct({{name_variable}}) %>%
mutate(redner = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
}

repair_comments <- function(comments, redner) {
cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
"Use repair(, repair_commments = FALSE) to skip this.\n"))
# try to find a redner id for each actual comment
comments %>%
filter(!is.na(kommentator)) %>%
lookup_redner(redner) %>%
lookup_redner(redner, kommentator) %>%
left_join(comments, ., by="kommentator") %>%
select(-kommentator)
}


Ładowanie…
Anuluj
Zapisz