An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

165 líneas
5.6KB

  1. #' Count number of occurences of a given word
  2. #'
  3. #' @param res tibble
  4. #' @param word character
  5. #'
  6. #' Add number of occurences of word to talks
  7. #'
  8. #' @export
  9. find_word <- function(res, word) {
  10. is_valid_res(res)
  11. talks <- res$talks
  12. mutate(
  13. talks,
  14. occurences = sapply(
  15. str_match_all(talks$content, regex(word, ignore_case = TRUE)),
  16. nrow
  17. )
  18. )
  19. }
  20. #' add information from speaker table to a tibble containing speaker id
  21. #'
  22. #' @param tb tibble
  23. #' @param res list of tibbles
  24. #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble
  25. #'
  26. #' left join speaker information from res$speaker into tb.
  27. #' if fraction_only
  28. #'
  29. #' @export
  30. join_speaker <- function(tb, res, fraction_only = F) {
  31. is_valid_res(res)
  32. joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
  33. if (fraction_only) select(joined, "fraction")
  34. else joined
  35. }
  36. #' lookup table for official party colors
  37. #'
  38. #' @export
  39. party_colors <- c(
  40. AfD="#1A9FDD",
  41. FDP="#FEEB34",
  42. "CDU/CSU"="#000000",
  43. SPD="#DF0B25",
  44. "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B",
  45. "DIE LINKE"="#BC3475",
  46. "AfD&Fraktionslos"="#AAAAFF",
  47. Fraktionslos="#AAAAAA"
  48. )
  49. party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
  50. "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
  51. "FDP", "AfD", NA_character_))
  52. #' Bar chart visualizing fraction based data
  53. #'
  54. #' Can be configured to also visualize data not related to fractions.
  55. #'
  56. #' @param tb tibble
  57. #' @param x_variable column in tb, default is fraction
  58. #' @param y_variable column in tb, default is n
  59. #' @param fill column in tb, default is fraction
  60. #' @param title plot title
  61. #' @param xlab label for x axis, default is fraction
  62. #' @param ylab label for y axis, default is n
  63. #' @param filllab default is 'Fraction'
  64. #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE
  65. #' @param position default is 'dodge'
  66. #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
  67. #' @param rotatelab Default is FALSE. If true turns the labels 90 degrees to the axis.
  68. #'
  69. #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
  70. #' Then color the plot depending on the fill value.
  71. #' Give the plot a title and a label for x-axis and y-axis,
  72. #' color the legend according to filllab and finally
  73. #' improve positioning details according to position
  74. #'
  75. #' @export
  76. bar_plot_fractions <- function(tb,
  77. x_variable = NULL, # default is fraction
  78. y_variable = NULL, # default is n
  79. fill = NULL, # default is fraction
  80. title = NULL,
  81. xlab = "Fraction",
  82. ylab = "n",
  83. filllab = "Fraction",
  84. flipped = TRUE,
  85. position = "dodge",
  86. reorder = FALSE,
  87. rotatelab = FALSE) {
  88. # capture expressions in arguments
  89. fill <- enexpr(fill)
  90. y_variable <- enexpr(y_variable)
  91. x_variable <- enexpr(x_variable)
  92. # set default values
  93. if (is.null(fill)) fill <- expr(fraction)
  94. if (is.null(y_variable)) y_variable <- expr(n)
  95. if (is.null(x_variable)) x_variable <- expr(fraction)
  96. # either reorder fraction factor by variable value
  97. if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
  98. y = !!y_variable,
  99. fill = reorder(!!fill, -!!y_variable))
  100. # or reorder fraction factor by party seat order in parliament (default)
  101. else maps <- aes(x = factor(!!x_variable, levels = party_order),
  102. y = !!y_variable,
  103. fill = factor(!!fill, levels = party_order))
  104. if(rotatelab){
  105. # make a bar plot
  106. ggplot(tb, maps, rotate) +
  107. scale_fill_manual(values = party_colors, na.value = "#555555") +
  108. xlab(xlab) +
  109. ylab(ylab) +
  110. labs(fill = filllab) +
  111. ggtitle(title) +
  112. geom_bar(stat = "identity", position = position) +
  113. theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) ->
  114. plt
  115. }
  116. else{
  117. # make a bar plot
  118. ggplot(tb, maps, rotate) +
  119. scale_fill_manual(values = party_colors, na.value = "#555555") +
  120. xlab(xlab) +
  121. ylab(ylab) +
  122. labs(fill = filllab) +
  123. ggtitle(title) +
  124. geom_bar(stat = "identity", position = position) ->
  125. plt
  126. }
  127. # if flipped == TRUE, draw bars horizontally (default TRUE)
  128. if (flipped) plt + coord_flip() else plt
  129. }
  130. #' Word usage summarised by date
  131. #'
  132. #' Counts how many talks do match a given pattern and summarises by date.
  133. #'
  134. #' @param res List of Tibbles to be analysed.
  135. #' @param patterns Words to look up.
  136. #' @param tidy default is FALSE.
  137. #'
  138. #' @export
  139. word_usage_by_date <- function(res, patterns, tidy=F) {
  140. is_valid_res(res)
  141. tb <- res$talks
  142. nms <- names(patterns)
  143. for (i in seq_along(patterns)) {
  144. if (!is.null(nms)) name <- nms[[i]]
  145. else name <- patterns[[i]]
  146. tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
  147. }
  148. left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
  149. group_by(date) %>%
  150. summarize(across(where(is.numeric), sum)) %>%
  151. arrange(date) -> tb
  152. if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
  153. else tb
  154. }