An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

97 行
3.3KB

  1. fraktionen <- c("AFD" = "AfD",
  2. "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN",
  3. "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN",
  4. "FRAKTIONSLOS" = "Fraktionslos",
  5. "DIELINKE" = "DIE LINKE",
  6. "SPD" = "SPD",
  7. "CDU/CSU" = "CDU/CSU",
  8. "FDP" = "FDP")
  9. repair_fraktion <- function(fraktion) {
  10. cleaned <- str_to_upper %$% str_replace_all(fraktion, "\\s", "")
  11. fraktionen[cleaned]
  12. }
  13. # takes vector of titel and keeps longest
  14. longest_titel <- function(titel) {
  15. if (all(is.na(titel))) NA_character_
  16. else titel[which.max %$% str_length(titel)]
  17. }
  18. # takes character vector, removes duplicates and collapses
  19. collect_unique <- function(xs) xs %>% clear_na() %>% unique() %>% str_c(collapse="&") %>% na_if("")
  20. # expects a tibble of redner and repairs
  21. repair_redner <- function(redner) {
  22. if (nrow(redner) == 0) return(redner)
  23. redner %>%
  24. filter(id != "10000") %>% # invalid id's
  25. mutate(fraktion = Vectorize(repair_fraktion)(fraktion)) %>% # fix fraktion
  26. group_by(id) %>%
  27. summarize(vorname = head(vorname, 1),
  28. nachname = head(nachname, 1),
  29. fraktion = collect_unique(fraktion),
  30. titel = longest_titel(titel),
  31. rolle_kurz = collect_unique(str_squish(rolle_kurz)),
  32. rolle_lang = collect_unique(str_squish(rolle_lang))) %>%
  33. ungroup() #%>%
  34. # arrange(id) %>%
  35. # distinct(vorname, nachname, fraktion, titel)
  36. }
  37. repair_reden <- function(reden) {
  38. if (nrow(reden) == 0) return(reden)
  39. # TODO: fill with content
  40. reden
  41. }
  42. repair_talks <- function(talks) {
  43. if (nrow(talks) == 0) return(talks)
  44. # ignore all talks which have empty content
  45. filter(talks, str_length(content) > 0)
  46. }
  47. # tries to find the correct redner id given a name
  48. # this is sufficient since every prename lastname combination in the bundestag is
  49. # unique (luckily :D)
  50. # returns a lookup table
  51. lookup_redner <- function(comments, redner) {
  52. tobereplaced <- "[-–—‑­­-­­­ ]"
  53. redner %>%
  54. unite(name, vorname, nachname, sep=".*") %>%
  55. mutate(name = str_replace_all(name, tobereplaced, ".*")) ->
  56. rs
  57. find_match <- function(komm) {
  58. if (komm == "") return (NA_character_)
  59. # I tried with agrep (levensthein distance) but results are better that way
  60. matches <- str_which(komm, rs$name)
  61. if (length(matches) == 0) return(NA_character_)
  62. rs[head(matches, 1), ]$id
  63. }
  64. comments %>%
  65. distinct(kommentator) %>%
  66. mutate(redner = Vectorize(find_match)(str_replace_all(kommentator, tobereplaced, "")))
  67. }
  68. repair_comments <- function(comments, redner) {
  69. # try to find a redner id for each actual comment
  70. comments %>%
  71. filter(!is.na(kommentator)) %>%
  72. lookup_redner(redner) %>%
  73. left_join(comments, ., by="kommentator") %>%
  74. select(-kommentator)
  75. }
  76. #' Repair parsed tables
  77. #'
  78. #' @export
  79. repair <- function(parse_output) {
  80. list(redner = repair_redner(parse_output$redner),
  81. reden = repair_reden(parse_output$reden),
  82. talks = repair_talks(parse_output$talks),
  83. #comments = repair_comments(parse_output$comments)
  84. comments = parse_output$comments,
  85. applause = parse_output$applause
  86. )
  87. }