An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

118 rindas
4.2KB

  1. fractions <- c("AFD" = "AfD",
  2. "AFD&FRAKTIONSLOS" = "AfD&Fraktionslos",
  3. "BÜNDNIS90/" = "BÜNDNIS 90 / DIE GRÜNEN",
  4. "BÜNDNIS90/DIEGRÜNEN" = "BÜNDNIS 90 / DIE GRÜNEN",
  5. "FRAKTIONSLOS" = "Fraktionslos",
  6. "DIELINKE" = "DIE LINKE",
  7. "SPD" = "SPD",
  8. "CDU/CSU" = "CDU/CSU",
  9. "FDP" = "FDP")
  10. repair_fraction <- function(fraction) {
  11. cleaned <- str_to_upper %$% str_replace_all(fraction, "\\s", "")
  12. fractions[cleaned]
  13. }
  14. # takes vector of titel and keeps longest
  15. longest_titel <- function(titel) {
  16. if (all(is.na(titel))) NA_character_
  17. else titel[which.max %$% str_length(titel)]
  18. }
  19. # takes character vector, removes duplicates and collapses
  20. collect_unique <- function(xs) xs %>% clear_na() %>% unique() %>% str_c(collapse="&") %>% na_if("")
  21. # expects a tibble of speaker and repairs
  22. repair_speaker <- function(speaker) {
  23. if (nrow(speaker) == 0) return(speaker)
  24. speaker %>%
  25. filter(id != "10000") %>% # invalid id's
  26. mutate(fraction = Vectorize(repair_fraction)(fraction)) %>% # fix fraction
  27. group_by(id) %>%
  28. summarize(vorname = head(vorname, 1),
  29. nachname = head(nachname, 1),
  30. fraction = collect_unique(fraction),
  31. titel = longest_titel(titel),
  32. rolle_kurz = collect_unique(str_squish(rolle_kurz)),
  33. rolle_lang = collect_unique(str_squish(rolle_lang))) %>%
  34. ungroup() #%>%
  35. # arrange(id) %>%
  36. # distinct(vorname, nachname, fraction, titel)
  37. }
  38. repair_speeches <- function(speeches) {
  39. if (nrow(speeches) == 0) return(speeches)
  40. # TODO: fill with content
  41. speeches
  42. }
  43. repair_talks <- function(talks) {
  44. if (nrow(talks) == 0) return(talks)
  45. # ignore all talks which have empty content
  46. filter(talks, str_length(content) > 0)
  47. }
  48. #' Lookup name in speakers table
  49. #'
  50. #' Tries to find the correct speaker id given a name.
  51. #' This is sufficient since every prename lastname combination in the bundestag is
  52. #' unique (luckily :D)
  53. #'
  54. #' @param tb tibble
  55. #' @param speaker tibble
  56. #' @param name_variable name
  57. #'
  58. #' Tries to match the name_variable column with speaker names
  59. #'
  60. #' returns a lookup table
  61. lookup_speaker <- function(tb, speaker, name_variable) {
  62. tobereplaced <- "[-–—‑­­-­­­ ]"
  63. speaker %>%
  64. unite(name, vorname, nachname, sep=".*") %>%
  65. mutate(name = str_replace_all(name, tobereplaced, ".*")) ->
  66. rs
  67. find_match <- function(komm) {
  68. if (komm == "") return (NA_character_)
  69. # I tried with agrep (levensthein distance) but results are better that way
  70. matches <- str_which(komm, rs$name)
  71. if (length(matches) == 0) return(NA_character_)
  72. rs[head(matches, 1), ]$id
  73. }
  74. tb %>%
  75. distinct({{name_variable}}) %>%
  76. mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
  77. }
  78. repair_comments <- function(comments, speaker) {
  79. cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
  80. "Use repair(, repair_commments = FALSE) to skip this.\n"))
  81. # try to find a speaker id for each actual comment
  82. comments %>%
  83. filter(!is.na(kommentator)) %>%
  84. lookup_speaker(speaker, kommentator) %>%
  85. left_join(comments, ., by="kommentator") %>%
  86. select(-kommentator)
  87. }
  88. #' Repair parsed tables
  89. #'
  90. #' @param parse_output tibble
  91. #' @param repair_comments bool
  92. #'
  93. #' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table.
  94. #'
  95. #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
  96. #' Since repaired tables should be a fixpoint of repair.
  97. #' @export
  98. repair <- function(parse_output, repair_comments = FALSE) {
  99. list(speaker = repair_speaker(parse_output$speaker),
  100. speeches = repair_speeches(parse_output$speeches),
  101. talks = repair_talks(parse_output$talks),
  102. comments = if(repair_comments) repair_comments(parse_output$comments,
  103. parse_output$speaker)
  104. else parse_output$comments,
  105. applause = parse_output$applause
  106. )
  107. }