An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

144 lignes
5.0KB

  1. #' Count number of occurences of a given word
  2. #'
  3. #' @param res tibble
  4. #' @param word character
  5. #'
  6. #' Add number of occurences of word to talks
  7. #'
  8. #' @export
  9. find_word <- function(res, word) {
  10. talks <- res$talks
  11. mutate(
  12. talks,
  13. occurences = sapply(
  14. str_match_all(talks$content, regex(word, ignore_case = TRUE)),
  15. nrow
  16. )
  17. )
  18. }
  19. #' add information from speaker table to a tibble containing speaker id
  20. #'
  21. #' @param tb tibble
  22. #' @param res list of tibbles
  23. #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble
  24. #'
  25. #' left join speaker information from res$speaker into tb.
  26. #' if fraction_only
  27. #'
  28. #' @export
  29. join_speaker <- function(tb, res, fraction_only = F) {
  30. joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
  31. if (fraction_only) select(joined, "fraction")
  32. else joined
  33. }
  34. #' lookup table for official party colors
  35. #'
  36. #' @export
  37. party_colors <- c(
  38. AfD="#1A9FDD",
  39. FDP="#FEEB34",
  40. "CDU/CSU"="#000000",
  41. SPD="#DF0B25",
  42. "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B",
  43. "DIE LINKE"="#BC3475",
  44. "AfD&Fraktionslos"="#AAAAFF",
  45. Fraktionslos="#AAAAAA"
  46. )
  47. party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
  48. "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
  49. "FDP", "AfD", NA_character_))
  50. #' Bar chart visualizing fraction based data
  51. #'
  52. #' Can be configured to also visualize data not related to fractions.
  53. #'
  54. #' @param tb tibble
  55. #' @param x_variable column in tb, default is fraction
  56. #' @param y_variable column in tb, default is n
  57. #' @param fill column in tb, default is fraction
  58. #' @param title plot title
  59. #' @param xlab label for x axis, default is fraction
  60. #' @param ylab label for y axis, default is n
  61. #' @param filllab default is 'Fraction'
  62. #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE
  63. #' @param position default is 'dodge'
  64. #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
  65. #'
  66. #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
  67. #' Then color the plot depending on the fill value.
  68. #' Give the plot a title and a label for x-axis and y-axis,
  69. #' color the legend according to filllab and finally
  70. #' improve positioning details according to position
  71. #'
  72. #' @export
  73. bar_plot_fractions <- function(tb,
  74. x_variable = NULL, # default is fraction
  75. y_variable = NULL, # default is n
  76. fill = NULL, # default is fraction
  77. title = NULL,
  78. xlab = "Fraction",
  79. ylab = "n",
  80. filllab = "Fraction",
  81. flipped = TRUE,
  82. position = "dodge",
  83. reorder = FALSE) {
  84. # capture expressions in arguments
  85. fill <- enexpr(fill)
  86. y_variable <- enexpr(y_variable)
  87. x_variable <- enexpr(x_variable)
  88. # set default values
  89. if (is.null(fill)) fill <- expr(fraction)
  90. if (is.null(y_variable)) y_variable <- expr(n)
  91. if (is.null(x_variable)) x_variable <- expr(fraction)
  92. # either reorder fraction factor by variable value
  93. if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
  94. y = !!y_variable,
  95. fill = reorder(!!fill, -!!y_variable))
  96. # or reorder fraction factor by party seat order in parliament (default)
  97. else maps <- aes(x = factor(!!x_variable, levels = party_order),
  98. y = !!y_variable,
  99. fill = factor(!!fill, levels = party_order))
  100. # make a bar plot
  101. ggplot(tb, maps) +
  102. scale_fill_manual(values = party_colors, na.value = "#555555") +
  103. xlab(xlab) +
  104. ylab(ylab) +
  105. labs(fill = filllab) +
  106. ggtitle(title) +
  107. geom_bar(stat = "identity", position = position) ->
  108. plt
  109. # if flipped == TRUE, draw bars horizontally (default TRUE)
  110. if (flipped) plt + coord_flip() else plt
  111. }
  112. #' Word usage summarised by date
  113. #'
  114. #' Counts how many talks do match a given pattern and summarises by date.
  115. #'
  116. #' @param res List of Tibbles to be analysed.
  117. #' @param patterns Words to look up.
  118. #' @param name ?
  119. #' @param tidy default is FALSE.
  120. #'
  121. #' @export
  122. word_usage_by_date <- function(res, patterns, name, tidy=F) {
  123. tb <- res$talks
  124. nms <- names(patterns)
  125. for (i in seq_along(patterns)) {
  126. if (!is.null(nms)) name <- nms[[i]]
  127. else name <- patterns[[i]]
  128. tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
  129. }
  130. left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
  131. group_by(date) %>%
  132. summarize(across(where(is.numeric), sum)) %>%
  133. arrange(date) -> tb
  134. if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
  135. else tb
  136. }