diff --git a/scraping/repair.R b/scraping/repair.R new file mode 100644 index 0000000..ef2d5fa --- /dev/null +++ b/scraping/repair.R @@ -0,0 +1,38 @@ +source("../utils/helpers.R") +fraktionen <- c("AFD" = "AfD", + "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN", + "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN", + "FRAKTIONSLOS" = "Fraktionslos", + "DIELINKE" = "DIE LINKE", + "SPD" = "SPD", + "CDU/CSU" = "CDU/CSU", + "FDP" = "FDP") + + +# expects a tibble of redner and repairs +repair_fraktion <- function(fraktion) { + cleaned <- str_to_upper %$% str_replace_all(fraktion, "\\s", "") + fraktionen[cleaned] +} + +repair_redner <- function(redner) { + # fix fraktionsnames + redner %>% mutate(fraktion = Vectorize(repair_fraktion)(fraktion)) +} + +repair_reden <- function(reden) { + # TODO: fill with content + reden +} + +repair_talks <- function(talks) { + # TODO: fill with content + talks +} + +# repairs all tables +repair <- function(parse_output) { + list(redner = repair_redner(parse_output$redner), + reden = repair_reden(parse_output$reden), + talks = repair_talks(parse_output$talks)) +}