An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
25'ten fazla konu seçemezsiniz Konular bir harf veya rakamla başlamalı, kısa çizgiler ('-') içerebilir ve en fazla 35 karakter uzunluğunda olabilir.

165 satır
5.6KB

  1. #' Count number of occurences of a given word
  2. #'
  3. #' @param res tibble
  4. #' @param word character
  5. #'
  6. #' Add number of occurences of word to talks
  7. #'
  8. #' @export
  9. find_word <- function(res, word) {
  10. is_valid_res(res)
  11. talks <- res$talks
  12. mutate(
  13. talks,
  14. occurences = sapply(
  15. str_match_all(talks$content, regex(word, ignore_case = TRUE)),
  16. nrow
  17. )
  18. )
  19. }
  20. #' add information from speaker table to a tibble containing speaker id
  21. #'
  22. #' @param tb tibble
  23. #' @param res list of tibbles
  24. #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble
  25. #'
  26. #' left join speaker information from res$speaker into tb.
  27. #' if fraction_only
  28. #'
  29. #' @export
  30. join_speaker <- function(tb, res, fraction_only = F) {
  31. is_valid_res(res)
  32. joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
  33. if (fraction_only) select(joined, "fraction")
  34. else joined
  35. }
  36. #' lookup table for official party colors
  37. #'
  38. #' @export
  39. party_colors <- c(
  40. AfD="#1A9FDD",
  41. FDP="#FEEB34",
  42. "CDU/CSU"="#000000",
  43. SPD="#DF0B25",
  44. "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B",
  45. "DIE LINKE"="#BC3475",
  46. "AfD&Fraktionslos"="#AAAAFF",
  47. Fraktionslos="#AAAAAA"
  48. )
  49. party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
  50. "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
  51. "FDP", "AfD", NA_character_))
  52. #' Bar chart visualizing fraction based data
  53. #'
  54. #' Can be configured to also visualize data not related to fractions.
  55. #'
  56. #' @param tb tibble
  57. #' @param x_variable column in tb, default is fraction
  58. #' @param y_variable column in tb, default is n
  59. #' @param fill column in tb, default is fraction
  60. #' @param title plot title
  61. #' @param xlab label for x axis, default is fraction
  62. #' @param ylab label for y axis, default is n
  63. #' @param filllab default is 'Fraction'
  64. #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE
  65. #' @param position default is 'dodge'
  66. #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
  67. #' @param rotatelab Default is FALSE. If true turns the labels 90 degrees to the axis.
  68. #'
  69. #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
  70. #' Then color the plot depending on the fill value.
  71. #' Give the plot a title and a label for x-axis and y-axis,
  72. #' color the legend according to filllab and finally
  73. #' improve positioning details according to position
  74. #'
  75. #' @export
  76. bar_plot_fractions <- function(tb,
  77. x_variable = NULL, # default is fraction
  78. y_variable = NULL, # default is n
  79. fill = NULL, # default is fraction
  80. title = NULL,
  81. xlab = "Fraction",
  82. ylab = "n",
  83. filllab = "Fraction",
  84. flipped = TRUE,
  85. position = "dodge",
  86. reorder = FALSE,
  87. rotatelab = FALSE) {
  88. # capture expressions in arguments
  89. fill <- enexpr(fill)
  90. y_variable <- enexpr(y_variable)
  91. x_variable <- enexpr(x_variable)
  92. # set default values
  93. if (is.null(fill)) fill <- expr(fraction)
  94. if (is.null(y_variable)) y_variable <- expr(n)
  95. if (is.null(x_variable)) x_variable <- expr(fraction)
  96. # either reorder fraction factor by variable value
  97. if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
  98. y = !!y_variable,
  99. fill = reorder(!!fill, -!!y_variable))
  100. # or reorder fraction factor by party seat order in parliament (default)
  101. else maps <- aes(x = factor(!!x_variable, levels = party_order),
  102. y = !!y_variable,
  103. fill = factor(!!fill, levels = party_order))
  104. if(rotatelab){
  105. # make a bar plot
  106. ggplot(tb, maps, rotate) +
  107. scale_fill_manual(values = party_colors, na.value = "#555555") +
  108. xlab(xlab) +
  109. ylab(ylab) +
  110. labs(fill = filllab) +
  111. ggtitle(title) +
  112. geom_bar(stat = "identity", position = position) +
  113. theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) ->
  114. plt
  115. }
  116. else{
  117. # make a bar plot
  118. ggplot(tb, maps, rotate) +
  119. scale_fill_manual(values = party_colors, na.value = "#555555") +
  120. xlab(xlab) +
  121. ylab(ylab) +
  122. labs(fill = filllab) +
  123. ggtitle(title) +
  124. geom_bar(stat = "identity", position = position) ->
  125. plt
  126. }
  127. # if flipped == TRUE, draw bars horizontally (default TRUE)
  128. if (flipped) plt + coord_flip() else plt
  129. }
  130. #' Word usage summarised by date
  131. #'
  132. #' Counts how many talks do match a given pattern and summarises by date.
  133. #'
  134. #' @param res List of Tibbles to be analysed.
  135. #' @param patterns Words to look up.
  136. #' @param tidy default is FALSE.
  137. #'
  138. #' @export
  139. word_usage_by_date <- function(res, patterns, tidy=F) {
  140. is_valid_res(res)
  141. tb <- res$talks
  142. nms <- names(patterns)
  143. for (i in seq_along(patterns)) {
  144. if (!is.null(nms)) name <- nms[[i]]
  145. else name <- patterns[[i]]
  146. tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
  147. }
  148. left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
  149. group_by(date) %>%
  150. summarize(across(where(is.numeric), sum)) %>%
  151. arrange(date) -> tb
  152. if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
  153. else tb
  154. }