diff --git a/R/helpers.R b/R/helpers.R index 148507f..18fdda3 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -1,4 +1,5 @@ `%$%` <- function(f, x) f(x) `%.%` <- function(f, g) function(...) f(g(...)) +flip <- function(f) function(x, y) f(y, x) clear_na <- function(xs) xs[!is.na(xs)] diff --git a/R/parse.R b/R/parse.R index 655c36b..66daece 100644 --- a/R/parse.R +++ b/R/parse.R @@ -137,12 +137,14 @@ parse_rede <- function(rede_xml) { comments = comments) } -fraktionspattern <- "BÜNDNIS 90/DIE GRÜNEN|CDU/CSU|AfD|SPD|DIE LINKE|FDP" +fraktionspattern <- "BÜNDNIS(SES)?\\W*90/DIE\\W*GRÜNEN|CDU/CSU|AfD|SPD|DIE LINKE|FDP|LINKEN" +fraktionsnames <- c("BÜNDNIS 90/DIE GRÜNEN", "CDU/CSU", "AfD", "SPD", "DIE LINKE", "FDP") parse_comment <- function(comment, rede_id, on_redner) { base <- c(rede_id = rede_id, on_redner = on_redner) str_extract_all(comment, fraktionspattern) %>% `[[`(1) %>% + sapply(partial(flip(head), 1) %.% agrep, x=fraktionsnames, max=0.2, value=T) %>% str_c(collapse=",") -> by # classify comment