An R package to analyze the parliamentary records of the 19th legislative period of the Bundestag, the German parliament.
Вы не можете выбрать более 25 тем Темы должны начинаться с буквы или цифры, могут содержать дефисы(-) и должны содержать не более 35 символов.

142 строки
3.8KB

  1. ---
  2. title: "funwithdata"
  3. output: rmarkdown::html_vignette
  4. vignette: >
  5. %\VignetteIndexEntry{funwithdata}
  6. %\VignetteEngine{knitr::rmarkdown}
  7. %\VignetteEncoding{UTF-8}
  8. ---
  9. ```{r, include = FALSE}
  10. knitr::opts_chunk$set(
  11. collapse = TRUE,
  12. comment = "#>"
  13. )
  14. ```
  15. ```{r setup}
  16. library(hateimparlament)
  17. library(dplyr)
  18. library(ggplot2)
  19. ```
  20. ## Preparation of data
  21. First, you need to download all records of the current legislative period.
  22. ```r
  23. fetch_all("../records/") # path to directory where records should be stored
  24. ```
  25. Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
  26. ```r
  27. read_all("../records/") %>% repair() -> res
  28. ```
  29. We also used `repair` to fix a bunch of formatting issues in the records and unpacked
  30. the result into more descriptive variables.
  31. For development purposes, we load the tables from csv files.
  32. ```{r}
  33. res <- read_from_csv('../csv/')
  34. ```
  35. and unpack our tibbles
  36. ```{r}
  37. comments <- res$comments
  38. reden <- res$reden
  39. redner <- res$redner
  40. talks <- res$talks
  41. ```
  42. ## Analysis
  43. Now we can start analysing our parsed dataset, e.g. find out which party gives the most talks:
  44. ```{r, fig.width=8}
  45. join_redner(res$reden, res) %>%
  46. group_by(fraktion) %>%
  47. summarize(n = n()) %>%
  48. arrange(n) %>%
  49. bar_plot_fraktionen()
  50. ```
  51. ### Count a word occurence
  52. ```{r, fig.width=8}
  53. find_word(res, "hitler") %>%
  54. filter(occurences > 0) %>%
  55. join_redner(res) %>%
  56. select(content, fraktion) %>%
  57. group_by(fraktion) %>%
  58. summarize(n = n()) %>%
  59. arrange(desc(n)) %>%
  60. bar_plot_fraktionen()
  61. ```
  62. ### Who gives the most speeches?
  63. ```{r}
  64. res$reden %>%
  65. group_by(redner) %>%
  66. summarize(n = n()) %>%
  67. arrange(-n) %>%
  68. left_join(res$redner, by=c("redner" = "id")) %>%
  69. head(10)
  70. ```
  71. ### Who talks the longest?
  72. ```{r}
  73. res$talks %>%
  74. mutate(content_len = str_length(content)) %>%
  75. group_by(redner) %>%
  76. summarize(avg_content_len = mean(content_len)) %>%
  77. arrange(-avg_content_len) %>%
  78. left_join(res$redner, by=c("redner" = "id")) %>%
  79. head(10)
  80. ```
  81. ### Which party gives the most applause to which parties?
  82. ```{r}
  83. res$applause %>%
  84. left_join(res$redner, by=c("on_redner" = "id")) %>%
  85. select(on_fraktion = fraktion, where(is.logical)) %>%
  86. group_by(on_fraktion) %>%
  87. arrange(on_fraktion) %>%
  88. summarize("AfD" = sum(`AfD`),
  89. "BÜNDNIS 90 / DIE GRÜNEN" = sum(`BÜNDNIS_90_DIE_GRÜNEN`),
  90. "CDU/CSU" = sum(`CDU_CSU`),
  91. "DIE LINKE" = sum(`DIE_LINKE`),
  92. "FDP" = sum(`FDP`),
  93. "SPD" = sum(`SPD`))
  94. ```
  95. ### Which party comments the most on which parties?
  96. ```{r}
  97. res$comments %>%
  98. left_join(res$redner, by=c("on_redner" = "id")) %>%
  99. select(by_fraktion = fraktion.x, on_fraktion = fraktion.y) %>%
  100. group_by(on_fraktion) %>%
  101. summarize(`AfD` = sum(str_detect(by_fraktion, "AfD"), na.rm=T),
  102. `BÜNDNIS 90 / DIE GRÜNEN` = sum(str_detect(by_fraktion, "BÜNDNIS 90/DIE GRÜNEN"), na.rm=T),
  103. `CDU/CSU` = sum(str_detect(by_fraktion, "CDU/CSU"), na.rm = T),
  104. `DIE LINKE` = sum(str_detect(by_fraktion, "DIE LINKE"), na.rm=T),
  105. `FDP` = sum(str_detect(by_fraktion, "FDP"), na.rm=T),
  106. `SPD` = sum(str_detect(by_fraktion, "SPD"), na.rm=T))
  107. ```
  108. ### When are which topics discussed the most?
  109. ```{r, fig.width=8}
  110. pandemic_pattern <- "(?i)virus|corona|covid|lockdown"
  111. climate_pattern <- "(?i)klimawandel|erderwärmung|co2|treibhaus|methan|kyoto-protokoll|klimaabkommen"
  112. pension_pattern <- "(?i)rente|pension|altersarmut"
  113. word_usage_by_date(res, c(pandemic = pandemic_pattern,
  114. climate = climate_pattern,
  115. pension = pension_pattern)) %>%
  116. ggplot(aes(x = date, y = count, color = pattern)) +
  117. xlab("date of session") +
  118. ylab("occurence of word per session") +
  119. labs(color = "Topic") +
  120. geom_point()
  121. ```