An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

55 строки
1.7KB

  1. fraktionen <- c("AFD" = "AfD",
  2. "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN",
  3. "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN",
  4. "FRAKTIONSLOS" = "Fraktionslos",
  5. "DIELINKE" = "DIE LINKE",
  6. "SPD" = "SPD",
  7. "CDU/CSU" = "CDU/CSU",
  8. "FDP" = "FDP")
  9. repair_fraktion <- function(fraktion) {
  10. cleaned <- str_to_upper %$% str_replace_all(fraktion, "\\s", "")
  11. fraktionen[cleaned]
  12. }
  13. # takes vector of titel and keeps longest
  14. longest_titel <- function(titel) {
  15. if (all(is.na(titel))) NA_character_
  16. else titel[which.max %$% str_length(titel)]
  17. }
  18. # takes character vector, removes duplicates and collapses
  19. collect_unique <- function(xs) xs %>% clear_na() %>% unique() %>% str_c(collapse="&") %>% na_if("")
  20. # expects a tibble of redner and repairs
  21. repair_redner <- function(redner) {
  22. if (nrow(redner) == 0) return(redner)
  23. redner %>% mutate(fraktion = Vectorize(repair_fraktion)(fraktion)) %>% # fix fraktion
  24. group_by(id, vorname, nachname) %>%
  25. summarize(fraktion = collect_unique(fraktion),
  26. titel = longest_titel(titel),
  27. rolle_kurz = collect_unique(str_squish(rolle_kurz)),
  28. rolle_lang = collect_unique(str_squish(rolle_lang)))
  29. }
  30. repair_reden <- function(reden) {
  31. if (nrow(reden) == 0) return(reden)
  32. # TODO: fill with content
  33. reden
  34. }
  35. repair_talks <- function(talks) {
  36. if (nrow(talks) == 0) return(talks)
  37. # TODO: fill with content
  38. talks
  39. }
  40. #' Repair parsed tables
  41. #'
  42. #' @export
  43. repair <- function(parse_output) {
  44. list(redner = repair_redner(parse_output$redner),
  45. reden = repair_reden(parse_output$reden),
  46. talks = repair_talks(parse_output$talks))
  47. }