An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

145 Zeilen
4.8KB

  1. #' Count number of occurences of a given word
  2. #'
  3. #' @param res tibble
  4. #' @param word character
  5. #'
  6. #' Add number of occurences of word to talks
  7. #'
  8. #' @export
  9. find_word <- function(res, word) {
  10. talks <- res$talks
  11. mutate(
  12. talks,
  13. occurences = sapply(
  14. str_match_all(talks$content, regex(word, ignore_case = TRUE)),
  15. nrow
  16. )
  17. )
  18. }
  19. #' add information from speaker table to a tibble containing speaker id
  20. #'
  21. #' @param tb tibble
  22. #' @param res tibble
  23. #' @param fraction_only bool
  24. #'
  25. #' left join speaker information from res$speaker into tb.
  26. #' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble
  27. #'
  28. #' @export
  29. join_speaker <- function(tb, res, fraction_only = F) {
  30. joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
  31. if (fraction_only) select(joined, "fraction")
  32. else joined
  33. }
  34. #' lookup table for party colors
  35. #'
  36. #' @export
  37. party_colors <- c(
  38. AfD="#1A9FDD",
  39. FDP="#FEEB34",
  40. "CDU/CSU"="#000000",
  41. SPD="#DF0B25",
  42. "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B",
  43. "DIE LINKE"="#BC3475",
  44. "AfD&Fraktionslos"="#AAAAFF",
  45. Fraktionslos="#AAAAAA"
  46. )
  47. party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
  48. "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
  49. "FDP", "AfD", NA_character_))
  50. #' plot data depending on fractions in a standardized, configurable way
  51. #'
  52. #' @param tb tibble
  53. #' @param x_variable column in tb
  54. #' @param y_variable column in tb
  55. #' @param fill column in tb
  56. #' @param title char
  57. #' @param xlab char
  58. #' @param ylab char
  59. #' @param filllab char
  60. #' @param flipped bool
  61. #' @param position char
  62. #' @param reorder bool
  63. #'
  64. #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
  65. #' Then color the plot depending on the fill value
  66. #' Give the plot a title, an x-label xlab as well as an y-label ylab
  67. #' Color the legend according to filllab
  68. #' Setting flipped to TRUE makes the bars horizontal
  69. #' Improve positioning details according to position
  70. #' and finally reorder x_variable (default ist to order fractions according to seat order)
  71. #'
  72. #' @export
  73. bar_plot_fractions <- function(tb,
  74. x_variable = NULL, # default is fraction
  75. y_variable = NULL, # default is n
  76. fill = NULL, # default is fraction
  77. title = NULL,
  78. xlab = "Fraction",
  79. ylab = "n",
  80. filllab = "Fraction",
  81. flipped = TRUE,
  82. position = "dodge",
  83. reorder = FALSE) {
  84. # capture expressions in arguments
  85. fill <- enexpr(fill)
  86. y_variable <- enexpr(y_variable)
  87. x_variable <- enexpr(x_variable)
  88. # set default values
  89. if (is.null(fill)) fill <- expr(fraction)
  90. if (is.null(y_variable)) y_variable <- expr(n)
  91. if (is.null(x_variable)) x_variable <- expr(fraction)
  92. # either reorder fraction factor by variable value
  93. if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
  94. y = !!y_variable,
  95. fill = reorder(!!fill, -!!y_variable))
  96. # or reorder fraction factor by party seat order in parliament (default)
  97. else maps <- aes(x = factor(!!x_variable, levels = party_order),
  98. y = !!y_variable,
  99. fill = factor(!!fill, levels = party_order))
  100. # make a bar plot
  101. ggplot(tb, maps) +
  102. scale_fill_manual(values = party_colors, na.value = "#555555") +
  103. xlab(xlab) +
  104. ylab(ylab) +
  105. labs(fill = filllab) +
  106. ggtitle(title) +
  107. geom_bar(stat = "identity", position = position) ->
  108. plt
  109. # if flipped == TRUE, draw bars horizontally (default TRUE)
  110. if (flipped) plt + coord_flip() else plt
  111. }
  112. #' Counts how many talks do match a given pattern and summarises by date
  113. #'
  114. #' @param res tibble
  115. #' @param patterns char list
  116. #' @param name char ? what is name needed for??
  117. #' @param tidy bool, default F
  118. #'
  119. #' shorter summary if tidy=F
  120. #' if tidy is set to T, the resulting tibble is tidy
  121. #'
  122. #' @export
  123. word_usage_by_date <- function(res, patterns, name, tidy=F) {
  124. tb <- res$talks
  125. nms <- names(patterns)
  126. for (i in seq_along(patterns)) {
  127. if (!is.null(nms)) name <- nms[[i]]
  128. else name <- patterns[[i]]
  129. tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
  130. }
  131. left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
  132. group_by(date) %>%
  133. summarize(across(where(is.numeric), sum)) %>%
  134. arrange(date) -> tb
  135. if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
  136. else tb
  137. }