An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

185 rindas
7.0KB

  1. #' Count number of occurences of a given word
  2. #'
  3. #' @param res tibble
  4. #' @param word character
  5. #'
  6. #' Add number of occurences of word to talks
  7. #'
  8. #' @export
  9. find_word <- function(res, word) {
  10. is_valid_res(res)
  11. stopifnot("word must be of type character" = is.character(word))
  12. talks <- res$talks
  13. mutate(
  14. talks,
  15. occurences = sapply(
  16. str_match_all(talks$content, regex(word, ignore_case = TRUE)),
  17. nrow
  18. )
  19. )
  20. }
  21. #' add information from speaker table to a tibble containing speaker id
  22. #'
  23. #' @param tb tibble
  24. #' @param res list of tibbles
  25. #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble
  26. #'
  27. #' left join speaker information from res$speaker into tb.
  28. #' if fraction_only, drop all columns but fraction
  29. #'
  30. #' @export
  31. join_speaker <- function(tb, res, fraction_only = F) {
  32. is_valid_res(res)
  33. stopifnot("fraction_only must be of type logical" = is.logical(fraction_only))
  34. stopifnot("tb must be a tibble" = inherits(tb, "tbl"))
  35. stopifnot("tb must have a speaker column" = "speaker" %in% names(tb))
  36. joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
  37. if (fraction_only) select(joined, "fraction")
  38. else joined
  39. }
  40. #' lookup table for official party colors
  41. #'
  42. #' @export
  43. party_colors <- c(
  44. AfD="#1A9FDD",
  45. FDP="#FEEB34",
  46. "CDU/CSU"="#000000",
  47. SPD="#DF0B25",
  48. "B\u00DCNDNIS 90/DIE GR\u00DCNEN"="#4A932B",
  49. "DIE LINKE"="#BC3475",
  50. "AfD&Fraktionslos"="#AAAAFF",
  51. Fraktionslos="#AAAAAA"
  52. )
  53. party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
  54. "DIE LINKE", "B\u00DCNDNIS 90/DIE GR\u00DCNEN", "SPD", "CDU/CSU",
  55. "FDP", "AfD", NA_character_))
  56. #' Bar chart visualizing fraction based data
  57. #'
  58. #' Can be configured to also visualize data not related to fractions.
  59. #'
  60. #' @param tb tibble
  61. #' @param x_variable column in tb, default is fraction
  62. #' @param y_variable column in tb, default is n
  63. #' @param fill column in tb, default is fraction
  64. #' @param title plot title
  65. #' @param xlab label for x axis, default is fraction
  66. #' @param ylab label for y axis, default is n
  67. #' @param filllab default is 'Fraction'
  68. #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE
  69. #' @param position default is 'dodge'
  70. #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
  71. #' @param rotatelab Default is FALSE. If true turns the labels 90 degrees to the axis.
  72. #'
  73. #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
  74. #' Then color the plot depending on the fill value.
  75. #' Give the plot a title and a label for x-axis and y-axis,
  76. #' color the legend according to filllab and finally
  77. #' improve positioning details according to position
  78. #'
  79. #' @export
  80. bar_plot_fractions <- function(tb,
  81. x_variable = NULL, # default is fraction
  82. y_variable = NULL, # default is n
  83. fill = NULL, # default is fraction
  84. title = NULL,
  85. xlab = "Fraction",
  86. ylab = "n",
  87. filllab = "Fraction",
  88. flipped = TRUE,
  89. position = "dodge",
  90. reorder = FALSE,
  91. rotatelab = FALSE) {
  92. # capture expressions in arguments
  93. fill <- enexpr(fill)
  94. y_variable <- enexpr(y_variable)
  95. x_variable <- enexpr(x_variable)
  96. # set default values
  97. if (is.null(fill)) fill <- expr(fraction)
  98. if (is.null(y_variable)) y_variable <- expr(n)
  99. if (is.null(x_variable)) x_variable <- expr(fraction)
  100. # check if variables exist
  101. if (!rlang::expr_text(x_variable) %in% names(tb))
  102. stop(paste0(rlang::expr_text(x_variable),
  103. " is not a column of tb. Did you set x_variable accordingly?"),
  104. .call = NULL)
  105. if (!rlang::expr_text(y_variable) %in% names(tb))
  106. stop(paste0(rlang::expr_text(y_variable),
  107. " is not a column of tb. Did you set y_variable accordingly?"),
  108. .call = NULL)
  109. if (!rlang::expr_text(fill) %in% names(tb))
  110. stop(paste0(rlang::expr_text(fill),
  111. " is not a column of tb. Did you set fill accordingly?"),
  112. .call = NULL)
  113. # check argument types
  114. stopifnot("title has to be of type character or NULL" = is.character(title) || is.null(title))
  115. stopifnot("xlab has to be of type character" = is.character(xlab))
  116. stopifnot("ylab has to be of type character" = is.character(ylab))
  117. stopifnot("filllab has to be of type character" = is.character(filllab))
  118. stopifnot("flipped has to be of type logical" = is.logical(flipped))
  119. stopifnot("rotatelab has to be of type logical" = is.logical(rotatelab))
  120. stopifnot("reorder has to be of type logical" = is.logical(reorder))
  121. # either reorder fraction factor by variable value
  122. if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
  123. y = !!y_variable,
  124. fill = reorder(!!fill, -!!y_variable))
  125. # or reorder fraction factor by party seat order in parliament (default)
  126. else maps <- aes(x = factor(!!x_variable, levels = party_order),
  127. y = !!y_variable,
  128. fill = factor(!!fill, levels = party_order))
  129. # make a bar plot
  130. ggplot(tb, maps) +
  131. scale_fill_manual(values = party_colors, na.value = "#555555") +
  132. xlab(xlab) +
  133. ylab(ylab) +
  134. labs(fill = filllab) +
  135. ggtitle(title) +
  136. geom_bar(stat = "identity", position = position) ->
  137. plt
  138. # if rotatelab == TRUE, rotate x labels by 90 degrees
  139. if (rotatelab)
  140. plt + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) -> plt
  141. # if flipped == TRUE, draw bars horizontally (default TRUE)
  142. if (flipped) plt + coord_flip() else plt
  143. }
  144. #' Word usage summarised by date
  145. #'
  146. #' Counts how many talks do match a given pattern and summarises by date.
  147. #'
  148. #' @param res List of Tibbles to be analysed.
  149. #' @param patterns Words to look up.
  150. #' @param tidy default is FALSE.
  151. #'
  152. #' @export
  153. word_usage_by_date <- function(res, patterns, tidy=F) {
  154. is_valid_res(res)
  155. stopifnot("patterns must be of type character" = is.character(patterns))
  156. stopifnot("tidy must be of type logical" = is.logical(tidy))
  157. tb <- res$talks
  158. nms <- names(patterns)
  159. for (i in seq_along(patterns)) {
  160. if (!is.null(nms)) name <- nms[[i]]
  161. else name <- patterns[[i]]
  162. tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
  163. }
  164. left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
  165. group_by(date) %>%
  166. summarize(across(where(is.numeric), sum)) %>%
  167. arrange(date) -> tb
  168. if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
  169. else tb
  170. }