An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

163 řádky
5.5KB

  1. #' Count number of occurences of a given word
  2. #'
  3. #' @param res tibble
  4. #' @param word character
  5. #'
  6. #' Add number of occurences of word to talks
  7. #'
  8. #' @export
  9. find_word <- function(res, word) {
  10. talks <- res$talks
  11. mutate(
  12. talks,
  13. occurences = sapply(
  14. str_match_all(talks$content, regex(word, ignore_case = TRUE)),
  15. nrow
  16. )
  17. )
  18. }
  19. #' add information from speaker table to a tibble containing speaker id
  20. #'
  21. #' @param tb tibble
  22. #' @param res list of tibbles
  23. #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble
  24. #'
  25. #' left join speaker information from res$speaker into tb.
  26. #' if fraction_only
  27. #'
  28. #' @export
  29. join_speaker <- function(tb, res, fraction_only = F) {
  30. joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
  31. if (fraction_only) select(joined, "fraction")
  32. else joined
  33. }
  34. #' lookup table for official party colors
  35. #'
  36. #' @export
  37. party_colors <- c(
  38. AfD="#1A9FDD",
  39. FDP="#FEEB34",
  40. "CDU/CSU"="#000000",
  41. SPD="#DF0B25",
  42. "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B",
  43. "DIE LINKE"="#BC3475",
  44. "AfD&Fraktionslos"="#AAAAFF",
  45. Fraktionslos="#AAAAAA"
  46. )
  47. party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
  48. "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
  49. "FDP", "AfD", NA_character_))
  50. #' Bar chart visualizing fraction based data
  51. #'
  52. #' Can be configured to also visualize data not related to fractions.
  53. #'
  54. #' @param tb tibble
  55. #' @param x_variable column in tb, default is fraction
  56. #' @param y_variable column in tb, default is n
  57. #' @param fill column in tb, default is fraction
  58. #' @param title plot title
  59. #' @param xlab label for x axis, default is fraction
  60. #' @param ylab label for y axis, default is n
  61. #' @param filllab default is 'Fraction'
  62. #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE
  63. #' @param position default is 'dodge'
  64. #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
  65. #' @param rotatelab Default is FALSE. If true turns the labels 90 degrees to the axis.
  66. #'
  67. #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
  68. #' Then color the plot depending on the fill value.
  69. #' Give the plot a title and a label for x-axis and y-axis,
  70. #' color the legend according to filllab and finally
  71. #' improve positioning details according to position
  72. #'
  73. #' @export
  74. bar_plot_fractions <- function(tb,
  75. x_variable = NULL, # default is fraction
  76. y_variable = NULL, # default is n
  77. fill = NULL, # default is fraction
  78. title = NULL,
  79. xlab = "Fraction",
  80. ylab = "n",
  81. filllab = "Fraction",
  82. flipped = TRUE,
  83. position = "dodge",
  84. reorder = FALSE,
  85. rotatelab = FALSE) {
  86. # capture expressions in arguments
  87. fill <- enexpr(fill)
  88. y_variable <- enexpr(y_variable)
  89. x_variable <- enexpr(x_variable)
  90. # set default values
  91. if (is.null(fill)) fill <- expr(fraction)
  92. if (is.null(y_variable)) y_variable <- expr(n)
  93. if (is.null(x_variable)) x_variable <- expr(fraction)
  94. # either reorder fraction factor by variable value
  95. if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
  96. y = !!y_variable,
  97. fill = reorder(!!fill, -!!y_variable))
  98. # or reorder fraction factor by party seat order in parliament (default)
  99. else maps <- aes(x = factor(!!x_variable, levels = party_order),
  100. y = !!y_variable,
  101. fill = factor(!!fill, levels = party_order))
  102. if(rotatelab){
  103. # make a bar plot
  104. ggplot(tb, maps, rotate) +
  105. scale_fill_manual(values = party_colors, na.value = "#555555") +
  106. xlab(xlab) +
  107. ylab(ylab) +
  108. labs(fill = filllab) +
  109. ggtitle(title) +
  110. geom_bar(stat = "identity", position = position) +
  111. theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) ->
  112. plt
  113. }
  114. else{
  115. # make a bar plot
  116. ggplot(tb, maps, rotate) +
  117. scale_fill_manual(values = party_colors, na.value = "#555555") +
  118. xlab(xlab) +
  119. ylab(ylab) +
  120. labs(fill = filllab) +
  121. ggtitle(title) +
  122. geom_bar(stat = "identity", position = position) ->
  123. plt
  124. }
  125. # if flipped == TRUE, draw bars horizontally (default TRUE)
  126. if (flipped) plt + coord_flip() else plt
  127. }
  128. #' Word usage summarised by date
  129. #'
  130. #' Counts how many talks do match a given pattern and summarises by date.
  131. #'
  132. #' @param res List of Tibbles to be analysed.
  133. #' @param patterns Words to look up.
  134. #' @param name ?
  135. #' @param tidy default is FALSE.
  136. #'
  137. #' @export
  138. word_usage_by_date <- function(res, patterns, name, tidy=F) {
  139. tb <- res$talks
  140. nms <- names(patterns)
  141. for (i in seq_along(patterns)) {
  142. if (!is.null(nms)) name <- nms[[i]]
  143. else name <- patterns[[i]]
  144. tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
  145. }
  146. left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
  147. group_by(date) %>%
  148. summarize(across(where(is.numeric), sum)) %>%
  149. arrange(date) -> tb
  150. if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
  151. else tb
  152. }