An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

120 líneas
4.4KB

  1. fractions <- c("AFD" = "AfD",
  2. "AFD&FRAKTIONSLOS" = "AfD&Fraktionslos",
  3. "B\u00DCNDNIS90/" = "B\u00DCNDNIS 90/DIE GR\u00DCNEN",
  4. "B\u00DCNDNIS90/DIEGR\u00DCNEN" = "B\u00DCNDNIS 90/DIE GR\u00DCNEN",
  5. "FRAKTIONSLOS" = "Fraktionslos",
  6. "DIELINKE" = "DIE LINKE",
  7. "SPD" = "SPD",
  8. "CDU/CSU" = "CDU/CSU",
  9. "FDP" = "FDP")
  10. repair_fraction <- function(fraction) {
  11. cleaned <- str_to_upper %$% str_replace_all(fraction, "\\s", "")
  12. fractions[cleaned]
  13. }
  14. # takes vector of titel and keeps longest
  15. longest_titel <- function(titel) {
  16. if (all(is.na(titel))) NA_character_
  17. else titel[which.max %$% str_length(titel)]
  18. }
  19. # takes character vector, removes duplicates and collapses
  20. collect_unique <- function(xs) xs %>% clear_na() %>% unique() %>% str_c(collapse="&") %>% na_if("")
  21. # expects a tibble of speaker and repairs
  22. repair_speaker <- function(speaker) {
  23. if (nrow(speaker) == 0) return(speaker)
  24. speaker %>%
  25. filter(id != "10000") %>% # invalid id's
  26. mutate(fraction = Vectorize(repair_fraction)(fraction)) %>% # fix fraction
  27. group_by(id) %>%
  28. summarize(prename = head(prename, 1),
  29. lastname = head(lastname, 1),
  30. fraction = collect_unique(fraction),
  31. title = longest_titel(title),
  32. role_short = collect_unique(str_squish(role_short)),
  33. role_long = collect_unique(str_squish(role_long))) %>%
  34. ungroup() #%>%
  35. }
  36. repair_speeches <- function(speeches) {
  37. if (nrow(speeches) == 0) return(speeches)
  38. # TODO: fill with content
  39. speeches
  40. }
  41. repair_talks <- function(talks) {
  42. if (nrow(talks) == 0) return(talks)
  43. # ignore all talks which have empty content
  44. filter(talks, str_length(content) > 0)
  45. }
  46. #' Lookup name in speakers table
  47. #'
  48. #' Tries to find the correct speaker id given a name.
  49. #' This is sufficient since every prename lastname combination in the bundestag is
  50. #' unique (luckily :D)
  51. #'
  52. #' @param tb tibble
  53. #' @param speaker tibble
  54. #' @param name_variable name
  55. #'
  56. #' Tries to match the name_variable column with speaker names
  57. #'
  58. #' returns a lookup table
  59. lookup_speaker <- function(tb, speaker, name_variable) {
  60. tobereplaced <- "[\u002D\u2013\u2014\u2011\u00AD ]"
  61. speaker %>%
  62. unite(name, prename, lastname, sep=".*") %>%
  63. mutate(name = str_replace_all(name, tobereplaced, ".*")) ->
  64. rs
  65. find_match <- function(komm) {
  66. if (komm == "") return (NA_character_)
  67. # I tried with agrep (levensthein distance) but results are better that way
  68. matches <- str_which(komm, rs$name)
  69. if (length(matches) == 0) return(NA_character_)
  70. rs[head(matches, 1), ]$id
  71. }
  72. tb %>%
  73. distinct({{name_variable}}) %>%
  74. mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
  75. }
  76. repair_comments <- function(comments, speaker, lookup_speaker=F) {
  77. comments %>%
  78. filter(!is.na(commenter) | !is.na(content) | !is.na(fraction)) ->
  79. tb
  80. if (lookup_speaker) {
  81. cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
  82. "Use repair(, lookup_speaker = FALSE) to skip this.\n"))
  83. # try to find a speaker id for each actual comment
  84. tb %>%
  85. filter(!is.na(commenter)) %>%
  86. lookup_speaker(speaker, commenter) %>%
  87. left_join(tb, ., by="commenter")
  88. } else tb
  89. }
  90. #' Repair parsed tables
  91. #'
  92. #' @param parse_output tibble
  93. #' @param lookup_speaker bool
  94. #'
  95. #' If lookup_speaker is TRUE, members of the parliament mentioned in comments are looked up in speaker table.
  96. #'
  97. #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
  98. #' Since repaired tables should be a fixpoint of repair.
  99. #' @export
  100. repair <- function(parse_output, lookup_speaker = FALSE) {
  101. is_valid_res(parse_output)
  102. stopifnot("lookup_speaker must be of type logical" = is.logical(lookup_speaker))
  103. list(speaker = repair_speaker(parse_output$speaker),
  104. speeches = repair_speeches(parse_output$speeches),
  105. talks = repair_talks(parse_output$talks),
  106. comments = repair_comments(parse_output$comments,
  107. parse_output$speaker,
  108. lookup_speaker),
  109. applause = parse_output$applause)
  110. }