An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

185 line
7.0KB

  1. #' Count number of occurences of a given word
  2. #'
  3. #' @param res tibble
  4. #' @param word character
  5. #'
  6. #' Add number of occurences of word to talks
  7. #'
  8. #' @export
  9. find_word <- function(res, word) {
  10. is_valid_res(res)
  11. stopifnot("word must be of type character" = is.character(word))
  12. talks <- res$talks
  13. mutate(
  14. talks,
  15. occurences = sapply(
  16. str_match_all(talks$content, regex(word, ignore_case = TRUE)),
  17. nrow
  18. )
  19. )
  20. }
  21. #' add information from speaker table to a tibble containing speaker id
  22. #'
  23. #' @param tb tibble
  24. #' @param res list of tibbles
  25. #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble
  26. #'
  27. #' left join speaker information from res$speaker into tb.
  28. #' if fraction_only, drop all columns but fraction
  29. #'
  30. #' @export
  31. join_speaker <- function(tb, res, fraction_only = F) {
  32. is_valid_res(res)
  33. stopifnot("fraction_only must be of type logical" = is.logical(fraction_only))
  34. stopifnot("tb must be a tibble" = inherits(tb, "tbl"))
  35. stopifnot("tb must have a speaker column" = "speaker" %in% names(tb))
  36. joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
  37. if (fraction_only) select(joined, "fraction")
  38. else joined
  39. }
  40. #' lookup table for official party colors
  41. #'
  42. #' @export
  43. party_colors <- c(
  44. AfD="#1A9FDD",
  45. FDP="#FEEB34",
  46. "CDU/CSU"="#000000",
  47. SPD="#DF0B25",
  48. "B\u00DCNDNIS 90/DIE GR\u00DCNEN"="#4A932B",
  49. "DIE LINKE"="#BC3475",
  50. "AfD&Fraktionslos"="#AAAAFF",
  51. Fraktionslos="#AAAAAA"
  52. )
  53. party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
  54. "DIE LINKE", "B\u00DCNDNIS 90/DIE GR\u00DCNEN", "SPD", "CDU/CSU",
  55. "FDP", "AfD", NA_character_))
  56. #' Bar chart visualizing fraction based data
  57. #'
  58. #' Can be configured to also visualize data not related to fractions.
  59. #'
  60. #' @param tb tibble
  61. #' @param x_variable column in tb, default is fraction
  62. #' @param y_variable column in tb, default is n
  63. #' @param fill column in tb, default is fraction
  64. #' @param title plot title
  65. #' @param xlab label for x axis, default is fraction
  66. #' @param ylab label for y axis, default is n
  67. #' @param filllab default is 'Fraction'
  68. #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE
  69. #' @param position default is 'dodge'
  70. #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
  71. #' @param rotatelab Default is FALSE. If true turns the labels 90 degrees to the axis.
  72. #'
  73. #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
  74. #' Then color the plot depending on the fill value.
  75. #' Give the plot a title and a label for x-axis and y-axis,
  76. #' color the legend according to filllab and finally
  77. #' improve positioning details according to position
  78. #'
  79. #' @export
  80. bar_plot_fractions <- function(tb,
  81. x_variable = NULL, # default is fraction
  82. y_variable = NULL, # default is n
  83. fill = NULL, # default is fraction
  84. title = NULL,
  85. xlab = "Fraction",
  86. ylab = "n",
  87. filllab = "Fraction",
  88. flipped = TRUE,
  89. position = "dodge",
  90. reorder = FALSE,
  91. rotatelab = FALSE) {
  92. # capture expressions in arguments
  93. fill <- enexpr(fill)
  94. y_variable <- enexpr(y_variable)
  95. x_variable <- enexpr(x_variable)
  96. # set default values
  97. if (is.null(fill)) fill <- expr(fraction)
  98. if (is.null(y_variable)) y_variable <- expr(n)
  99. if (is.null(x_variable)) x_variable <- expr(fraction)
  100. # check if variables exist
  101. if (!rlang::expr_text(x_variable) %in% names(tb))
  102. stop(paste0(rlang::expr_text(x_variable),
  103. " is not a column of tb. Did you set x_variable accordingly?"),
  104. .call = NULL)
  105. if (!rlang::expr_text(y_variable) %in% names(tb))
  106. stop(paste0(rlang::expr_text(y_variable),
  107. " is not a column of tb. Did you set y_variable accordingly?"),
  108. .call = NULL)
  109. if (!rlang::expr_text(fill) %in% names(tb))
  110. stop(paste0(rlang::expr_text(fill),
  111. " is not a column of tb. Did you set fill accordingly?"),
  112. .call = NULL)
  113. # check argument types
  114. stopifnot("title has to be of type character or NULL" = is.character(title) || is.null(title))
  115. stopifnot("xlab has to be of type character" = is.character(xlab))
  116. stopifnot("ylab has to be of type character" = is.character(ylab))
  117. stopifnot("filllab has to be of type character" = is.character(filllab))
  118. stopifnot("flipped has to be of type logical" = is.logical(flipped))
  119. stopifnot("rotatelab has to be of type logical" = is.logical(rotatelab))
  120. stopifnot("reorder has to be of type logical" = is.logical(reorder))
  121. # either reorder fraction factor by variable value
  122. if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
  123. y = !!y_variable,
  124. fill = reorder(!!fill, -!!y_variable))
  125. # or reorder fraction factor by party seat order in parliament (default)
  126. else maps <- aes(x = factor(!!x_variable, levels = party_order),
  127. y = !!y_variable,
  128. fill = factor(!!fill, levels = party_order))
  129. # make a bar plot
  130. ggplot(tb, maps) +
  131. scale_fill_manual(values = party_colors, na.value = "#555555") +
  132. xlab(xlab) +
  133. ylab(ylab) +
  134. labs(fill = filllab) +
  135. ggtitle(title) +
  136. geom_bar(stat = "identity", position = position) ->
  137. plt
  138. # if rotatelab == TRUE, rotate x labels by 90 degrees
  139. if (rotatelab)
  140. plt + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) -> plt
  141. # if flipped == TRUE, draw bars horizontally (default TRUE)
  142. if (flipped) plt + coord_flip() else plt
  143. }
  144. #' Word usage summarised by date
  145. #'
  146. #' Counts how many talks do match a given pattern and summarises by date.
  147. #'
  148. #' @param res List of Tibbles to be analysed.
  149. #' @param patterns Words to look up.
  150. #' @param tidy default is FALSE.
  151. #'
  152. #' @export
  153. word_usage_by_date <- function(res, patterns, tidy=F) {
  154. is_valid_res(res)
  155. stopifnot("patterns must be of type character" = is.character(patterns))
  156. stopifnot("tidy must be of type logical" = is.logical(tidy))
  157. tb <- res$talks
  158. nms <- names(patterns)
  159. for (i in seq_along(patterns)) {
  160. if (!is.null(nms)) name <- nms[[i]]
  161. else name <- patterns[[i]]
  162. tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
  163. }
  164. left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
  165. group_by(date) %>%
  166. summarize(across(where(is.numeric), sum)) %>%
  167. arrange(date) -> tb
  168. if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
  169. else tb
  170. }