An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符

145 行
4.8KB

  1. #' Count number of occurences of a given word
  2. #'
  3. #' @param res tibble
  4. #' @param word character
  5. #'
  6. #' Add number of occurences of word to talks
  7. #'
  8. #' @export
  9. find_word <- function(res, word) {
  10. talks <- res$talks
  11. mutate(
  12. talks,
  13. occurences = sapply(
  14. str_match_all(talks$content, regex(word, ignore_case = TRUE)),
  15. nrow
  16. )
  17. )
  18. }
  19. #' add information from speaker table to a tibble containing speaker id
  20. #'
  21. #' @param tb tibble
  22. #' @param res tibble
  23. #' @param fraction_only bool
  24. #'
  25. #' left join speaker information from res$speaker into tb.
  26. #' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble
  27. #'
  28. #' @export
  29. join_speaker <- function(tb, res, fraction_only = F) {
  30. joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
  31. if (fraction_only) select(joined, "fraction")
  32. else joined
  33. }
  34. #' lookup table for party colors
  35. #'
  36. #' @export
  37. party_colors <- c(
  38. AfD="#1A9FDD",
  39. FDP="#FEEB34",
  40. "CDU/CSU"="#000000",
  41. SPD="#DF0B25",
  42. "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B",
  43. "DIE LINKE"="#BC3475",
  44. "AfD&Fraktionslos"="#AAAAFF",
  45. Fraktionslos="#AAAAAA"
  46. )
  47. party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
  48. "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
  49. "FDP", "AfD", NA_character_))
  50. #' plot data depending on fractions in a standardized, configurable way
  51. #'
  52. #' @param tb tibble
  53. #' @param x_variable column in tb
  54. #' @param y_variable column in tb
  55. #' @param fill column in tb
  56. #' @param title char
  57. #' @param xlab char
  58. #' @param ylab char
  59. #' @param filllab char
  60. #' @param flipped bool
  61. #' @param position char
  62. #' @param reorder bool
  63. #'
  64. #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
  65. #' Then color the plot depending on the fill value
  66. #' Give the plot a title, an x-label xlab as well as an y-label ylab
  67. #' Color the legend according to filllab
  68. #' Setting flipped to TRUE makes the bars horizontal
  69. #' Improve positioning details according to position
  70. #' and finally reorder x_variable (default ist to order fractions according to seat order)
  71. #'
  72. #' @export
  73. bar_plot_fractions <- function(tb,
  74. x_variable = NULL, # default is fraction
  75. y_variable = NULL, # default is n
  76. fill = NULL, # default is fraction
  77. title = NULL,
  78. xlab = "Fraction",
  79. ylab = "n",
  80. filllab = "Fraction",
  81. flipped = TRUE,
  82. position = "dodge",
  83. reorder = FALSE) {
  84. # capture expressions in arguments
  85. fill <- enexpr(fill)
  86. y_variable <- enexpr(y_variable)
  87. x_variable <- enexpr(x_variable)
  88. # set default values
  89. if (is.null(fill)) fill <- expr(fraction)
  90. if (is.null(y_variable)) y_variable <- expr(n)
  91. if (is.null(x_variable)) x_variable <- expr(fraction)
  92. # either reorder fraction factor by variable value
  93. if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
  94. y = !!y_variable,
  95. fill = reorder(!!fill, -!!y_variable))
  96. # or reorder fraction factor by party seat order in parliament (default)
  97. else maps <- aes(x = factor(!!x_variable, levels = party_order),
  98. y = !!y_variable,
  99. fill = factor(!!fill, levels = party_order))
  100. # make a bar plot
  101. ggplot(tb, maps) +
  102. scale_fill_manual(values = party_colors, na.value = "#555555") +
  103. xlab(xlab) +
  104. ylab(ylab) +
  105. labs(fill = filllab) +
  106. ggtitle(title) +
  107. geom_bar(stat = "identity", position = position) ->
  108. plt
  109. # if flipped == TRUE, draw bars horizontally (default TRUE)
  110. if (flipped) plt + coord_flip() else plt
  111. }
  112. #' Counts how many talks do match a given pattern and summarises by date
  113. #'
  114. #' @param res tibble
  115. #' @param patterns char list
  116. #' @param name char ? what is name needed for??
  117. #' @param tidy bool, default F
  118. #'
  119. #' shorter summary if tidy=F
  120. #' if tidy is set to T, the resulting tibble is tidy
  121. #'
  122. #' @export
  123. word_usage_by_date <- function(res, patterns, name, tidy=F) {
  124. tb <- res$talks
  125. nms <- names(patterns)
  126. for (i in seq_along(patterns)) {
  127. if (!is.null(nms)) name <- nms[[i]]
  128. else name <- patterns[[i]]
  129. tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
  130. }
  131. left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
  132. group_by(date) %>%
  133. summarize(across(where(is.numeric), sum)) %>%
  134. arrange(date) -> tb
  135. if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
  136. else tb
  137. }