An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

55 lines
1.7KB

  1. fraktionen <- c("AFD" = "AfD",
  2. "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN",
  3. "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN",
  4. "FRAKTIONSLOS" = "Fraktionslos",
  5. "DIELINKE" = "DIE LINKE",
  6. "SPD" = "SPD",
  7. "CDU/CSU" = "CDU/CSU",
  8. "FDP" = "FDP")
  9. repair_fraktion <- function(fraktion) {
  10. cleaned <- str_to_upper %$% str_replace_all(fraktion, "\\s", "")
  11. fraktionen[cleaned]
  12. }
  13. # takes vector of titel and keeps longest
  14. longest_titel <- function(titel) {
  15. if (all(is.na(titel))) NA_character_
  16. else titel[which.max %$% str_length(titel)]
  17. }
  18. # takes character vector, removes duplicates and collapses
  19. collect_unique <- function(xs) xs %>% clear_na() %>% unique() %>% str_c(collapse="&") %>% na_if("")
  20. # expects a tibble of redner and repairs
  21. repair_redner <- function(redner) {
  22. if (nrow(redner) == 0) return(redner)
  23. redner %>% mutate(fraktion = Vectorize(repair_fraktion)(fraktion)) %>% # fix fraktion
  24. group_by(id, vorname, nachname) %>%
  25. summarize(fraktion = collect_unique(fraktion),
  26. titel = longest_titel(titel),
  27. rolle_kurz = collect_unique(str_squish(rolle_kurz)),
  28. rolle_lang = collect_unique(str_squish(rolle_lang)))
  29. }
  30. repair_reden <- function(reden) {
  31. if (nrow(reden) == 0) return(reden)
  32. # TODO: fill with content
  33. reden
  34. }
  35. repair_talks <- function(talks) {
  36. if (nrow(talks) == 0) return(talks)
  37. # TODO: fill with content
  38. talks
  39. }
  40. #' Repair parsed tables
  41. #'
  42. #' @export
  43. repair <- function(parse_output) {
  44. list(redner = repair_redner(parse_output$redner),
  45. reden = repair_reden(parse_output$reden),
  46. talks = repair_talks(parse_output$talks))
  47. }