An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

177 lines
6.5KB

  1. #' Count number of occurences of a given word
  2. #'
  3. #' @param res tibble
  4. #' @param word character
  5. #'
  6. #' Add number of occurences of word to talks
  7. #'
  8. #' @export
  9. find_word <- function(res, word) {
  10. is_valid_res(res)
  11. talks <- res$talks
  12. mutate(
  13. talks,
  14. occurences = sapply(
  15. str_match_all(talks$content, regex(word, ignore_case = TRUE)),
  16. nrow
  17. )
  18. )
  19. }
  20. #' add information from speaker table to a tibble containing speaker id
  21. #'
  22. #' @param tb tibble
  23. #' @param res list of tibbles
  24. #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble
  25. #'
  26. #' left join speaker information from res$speaker into tb.
  27. #' if fraction_only
  28. #'
  29. #' @export
  30. join_speaker <- function(tb, res, fraction_only = F) {
  31. is_valid_res(res)
  32. joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
  33. if (fraction_only) select(joined, "fraction")
  34. else joined
  35. }
  36. #' lookup table for official party colors
  37. #'
  38. #' @export
  39. party_colors <- c(
  40. AfD="#1A9FDD",
  41. FDP="#FEEB34",
  42. "CDU/CSU"="#000000",
  43. SPD="#DF0B25",
  44. "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B",
  45. "DIE LINKE"="#BC3475",
  46. "AfD&Fraktionslos"="#AAAAFF",
  47. Fraktionslos="#AAAAAA"
  48. )
  49. party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
  50. "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
  51. "FDP", "AfD", NA_character_))
  52. #' Bar chart visualizing fraction based data
  53. #'
  54. #' Can be configured to also visualize data not related to fractions.
  55. #'
  56. #' @param tb tibble
  57. #' @param x_variable column in tb, default is fraction
  58. #' @param y_variable column in tb, default is n
  59. #' @param fill column in tb, default is fraction
  60. #' @param title plot title
  61. #' @param xlab label for x axis, default is fraction
  62. #' @param ylab label for y axis, default is n
  63. #' @param filllab default is 'Fraction'
  64. #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE
  65. #' @param position default is 'dodge'
  66. #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
  67. #' @param rotatelab Default is FALSE. If true turns the labels 90 degrees to the axis.
  68. #'
  69. #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
  70. #' Then color the plot depending on the fill value.
  71. #' Give the plot a title and a label for x-axis and y-axis,
  72. #' color the legend according to filllab and finally
  73. #' improve positioning details according to position
  74. #'
  75. #' @export
  76. bar_plot_fractions <- function(tb,
  77. x_variable = NULL, # default is fraction
  78. y_variable = NULL, # default is n
  79. fill = NULL, # default is fraction
  80. title = NULL,
  81. xlab = "Fraction",
  82. ylab = "n",
  83. filllab = "Fraction",
  84. flipped = TRUE,
  85. position = "dodge",
  86. reorder = FALSE,
  87. rotatelab = FALSE) {
  88. # capture expressions in arguments
  89. fill <- enexpr(fill)
  90. y_variable <- enexpr(y_variable)
  91. x_variable <- enexpr(x_variable)
  92. # set default values
  93. if (is.null(fill)) fill <- expr(fraction)
  94. if (is.null(y_variable)) y_variable <- expr(n)
  95. if (is.null(x_variable)) x_variable <- expr(fraction)
  96. # check if variables exist
  97. if (!rlang::expr_text(x_variable) %in% names(tb))
  98. stop(paste0(rlang::expr_text(x_variable),
  99. " is not a column of tb. Did you set x_variable accordingly?"),
  100. .call = NULL)
  101. if (!rlang::expr_text(y_variable) %in% names(tb))
  102. stop(paste0(rlang::expr_text(y_variable),
  103. " is not a column of tb. Did you set y_variable accordingly?"),
  104. .call = NULL)
  105. if (!rlang::expr_text(fill) %in% names(tb))
  106. stop(paste0(rlang::expr_text(fill),
  107. " is not a column of tb. Did you set fill accordingly?"),
  108. .call = NULL)
  109. # check argument types
  110. stopifnot("title has to be of type character or NULL" = is.character(title) || is.null(title))
  111. stopifnot("xlab has to be of type character" = is.character(xlab))
  112. stopifnot("ylab has to be of type character" = is.character(ylab))
  113. stopifnot("filllab has to be of type character" = is.character(filllab))
  114. stopifnot("flipped has to be of type logical" = is.logical(flipped))
  115. stopifnot("rotatelab has to be of type logical" = is.logical(rotatelab))
  116. stopifnot("reorder has to be of type logical" = is.logical(reorder))
  117. # either reorder fraction factor by variable value
  118. if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
  119. y = !!y_variable,
  120. fill = reorder(!!fill, -!!y_variable))
  121. # or reorder fraction factor by party seat order in parliament (default)
  122. else maps <- aes(x = factor(!!x_variable, levels = party_order),
  123. y = !!y_variable,
  124. fill = factor(!!fill, levels = party_order))
  125. # make a bar plot
  126. ggplot(tb, maps) +
  127. scale_fill_manual(values = party_colors, na.value = "#555555") +
  128. xlab(xlab) +
  129. ylab(ylab) +
  130. labs(fill = filllab) +
  131. ggtitle(title) +
  132. geom_bar(stat = "identity", position = position) ->
  133. plt
  134. # if rotatelab == TRUE, rotate x labels by 90 degrees
  135. if (rotatelab)
  136. plt + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) -> plt
  137. # if flipped == TRUE, draw bars horizontally (default TRUE)
  138. if (flipped) plt + coord_flip() else plt
  139. }
  140. #' Word usage summarised by date
  141. #'
  142. #' Counts how many talks do match a given pattern and summarises by date.
  143. #'
  144. #' @param res List of Tibbles to be analysed.
  145. #' @param patterns Words to look up.
  146. #' @param tidy default is FALSE.
  147. #'
  148. #' @export
  149. word_usage_by_date <- function(res, patterns, tidy=F) {
  150. is_valid_res(res)
  151. tb <- res$talks
  152. nms <- names(patterns)
  153. for (i in seq_along(patterns)) {
  154. if (!is.null(nms)) name <- nms[[i]]
  155. else name <- patterns[[i]]
  156. tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
  157. }
  158. left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
  159. group_by(date) %>%
  160. summarize(across(where(is.numeric), sum)) %>%
  161. arrange(date) -> tb
  162. if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
  163. else tb
  164. }