fraktionen <- c("AFD" = "AfD", "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN", "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN", "FRAKTIONSLOS" = "Fraktionslos", "DIELINKE" = "DIE LINKE", "SPD" = "SPD", "CDU/CSU" = "CDU/CSU", "FDP" = "FDP") repair_fraktion <- function(fraktion) { cleaned <- str_to_upper %$% str_replace_all(fraktion, "\\s", "") fraktionen[cleaned] } # takes vector of titel and keeps longest longest_titel <- function(titel) { if (all(is.na(titel))) NA_character_ else titel[which.max %$% str_length(titel)] } # takes character vector, removes duplicates and collapses collect_unique <- function(xs) xs %>% clear_na() %>% unique() %>% str_c(collapse="&") %>% na_if("") # expects a tibble of redner and repairs repair_redner <- function(redner) { redner %>% mutate(fraktion = Vectorize(repair_fraktion)(fraktion)) %>% # fix fraktion group_by(id, vorname, nachname) %>% summarize(fraktion = collect_unique(fraktion), titel = longest_titel(titel), rolle_kurz = collect_unique(str_squish(rolle_kurz)), rolle_lang = collect_unique(str_squish(rolle_lang))) } repair_reden <- function(reden) { # TODO: fill with content reden } repair_talks <- function(talks) { # TODO: fill with content talks } # repairs all tables repair <- function(parse_output) { list(redner = repair_redner(parse_output$redner), reden = repair_reden(parse_output$reden), talks = repair_talks(parse_output$talks)) }