An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.

105 wiersze
3.7KB

  1. fraktionen <- c("AFD" = "AfD",
  2. "AFD&FRAKTIONSLOS" = "AfD&Fraktionslos",
  3. "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN",
  4. "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN",
  5. "FRAKTIONSLOS" = "Fraktionslos",
  6. "DIELINKE" = "DIE LINKE",
  7. "SPD" = "SPD",
  8. "CDU/CSU" = "CDU/CSU",
  9. "FDP" = "FDP")
  10. repair_fraktion <- function(fraktion) {
  11. cleaned <- str_to_upper %$% str_replace_all(fraktion, "\\s", "")
  12. fraktionen[cleaned]
  13. }
  14. # takes vector of titel and keeps longest
  15. longest_titel <- function(titel) {
  16. if (all(is.na(titel))) NA_character_
  17. else titel[which.max %$% str_length(titel)]
  18. }
  19. # takes character vector, removes duplicates and collapses
  20. collect_unique <- function(xs) xs %>% clear_na() %>% unique() %>% str_c(collapse="&") %>% na_if("")
  21. # expects a tibble of redner and repairs
  22. repair_redner <- function(redner) {
  23. if (nrow(redner) == 0) return(redner)
  24. redner %>%
  25. filter(id != "10000") %>% # invalid id's
  26. mutate(fraktion = Vectorize(repair_fraktion)(fraktion)) %>% # fix fraktion
  27. group_by(id) %>%
  28. summarize(vorname = head(vorname, 1),
  29. nachname = head(nachname, 1),
  30. fraktion = collect_unique(fraktion),
  31. titel = longest_titel(titel),
  32. rolle_kurz = collect_unique(str_squish(rolle_kurz)),
  33. rolle_lang = collect_unique(str_squish(rolle_lang))) %>%
  34. ungroup() #%>%
  35. # arrange(id) %>%
  36. # distinct(vorname, nachname, fraktion, titel)
  37. }
  38. repair_reden <- function(reden) {
  39. if (nrow(reden) == 0) return(reden)
  40. # TODO: fill with content
  41. reden
  42. }
  43. repair_talks <- function(talks) {
  44. if (nrow(talks) == 0) return(talks)
  45. # ignore all talks which have empty content
  46. filter(talks, str_length(content) > 0)
  47. }
  48. # tries to find the correct redner id given a name
  49. # this is sufficient since every prename lastname combination in the bundestag is
  50. # unique (luckily :D)
  51. # returns a lookup table
  52. lookup_redner <- function(comments, redner) {
  53. tobereplaced <- "[-–—‑­­-­­­ ]"
  54. redner %>%
  55. unite(name, vorname, nachname, sep=".*") %>%
  56. mutate(name = str_replace_all(name, tobereplaced, ".*")) ->
  57. rs
  58. find_match <- function(komm) {
  59. if (komm == "") return (NA_character_)
  60. # I tried with agrep (levensthein distance) but results are better that way
  61. matches <- str_which(komm, rs$name)
  62. if (length(matches) == 0) return(NA_character_)
  63. rs[head(matches, 1), ]$id
  64. }
  65. comments %>%
  66. distinct(kommentator) %>%
  67. mutate(redner = Vectorize(find_match)(str_replace_all(kommentator, tobereplaced, "")))
  68. }
  69. repair_comments <- function(comments, redner) {
  70. # try to find a redner id for each actual comment
  71. comments %>%
  72. filter(!is.na(kommentator)) %>%
  73. lookup_redner(redner) %>%
  74. left_join(comments, ., by="kommentator") %>%
  75. select(-kommentator)
  76. }
  77. #' Repair parsed tables
  78. #'
  79. #' TODO: Explain repair_comments argument
  80. #' (if TRUE, we try to lookup redner names in redner table)
  81. #'
  82. #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
  83. #' Since repaired tables should be a fixpoint of repair.
  84. #' @export
  85. repair <- function(parse_output, repair_comments = FALSE) {
  86. list(redner = repair_redner(parse_output$redner),
  87. reden = repair_reden(parse_output$reden),
  88. talks = repair_talks(parse_output$talks),
  89. comments = if(repair_comments) repair_comments(parse_output$comments,
  90. parse_output$redner)
  91. else parse_output$comments,
  92. applause = parse_output$applause
  93. )
  94. }