|
|
|
@@ -18,6 +18,8 @@ knitr::opts_chunk$set( |
|
|
|
library(hateimparlament) |
|
|
|
library(dplyr) |
|
|
|
library(ggplot2) |
|
|
|
library(stringr) |
|
|
|
library(tidyr) |
|
|
|
``` |
|
|
|
|
|
|
|
## Preparation of data |
|
|
|
@@ -48,25 +50,28 @@ talks <- res$talks |
|
|
|
## Analysis |
|
|
|
|
|
|
|
Now we can start analysing our parsed dataset, e.g. find out which party gives the most talks: |
|
|
|
```{r, fig.width=8} |
|
|
|
```{r, fig.width=7} |
|
|
|
join_redner(res$reden, res) %>% |
|
|
|
group_by(fraktion) %>% |
|
|
|
summarize(n = n()) %>% |
|
|
|
arrange(n) %>% |
|
|
|
bar_plot_fraktionen() |
|
|
|
bar_plot_fraktionen(n, fill = fraktion, title="Number of speeches given by fraction", ylab="Number of speeches") |
|
|
|
``` |
|
|
|
|
|
|
|
### Count a word occurence |
|
|
|
or counting the occurences of a given word: |
|
|
|
|
|
|
|
```{r, fig.width=8} |
|
|
|
find_word(res, "hitler") %>% |
|
|
|
```{r, fig.width=7} |
|
|
|
find_word(res, "Kohleausstieg") %>% |
|
|
|
filter(occurences > 0) %>% |
|
|
|
join_redner(res) %>% |
|
|
|
select(content, fraktion) %>% |
|
|
|
filter(!is.na(fraktion)) %>% |
|
|
|
group_by(fraktion) %>% |
|
|
|
summarize(n = n()) %>% |
|
|
|
arrange(desc(n)) %>% |
|
|
|
bar_plot_fraktionen() |
|
|
|
bar_plot_fraktionen(n, fill = fraktion, |
|
|
|
title = "Parties using the word 'Kohleausstieg' the most (absolutely)", |
|
|
|
ylab = "Number of uses of 'Kohleausstieg'") |
|
|
|
``` |
|
|
|
|
|
|
|
### Who gives the most speeches? |
|
|
|
@@ -105,9 +110,24 @@ res$applause %>% |
|
|
|
"CDU/CSU" = sum(`CDU_CSU`), |
|
|
|
"DIE LINKE" = sum(`DIE_LINKE`), |
|
|
|
"FDP" = sum(`FDP`), |
|
|
|
"SPD" = sum(`SPD`)) |
|
|
|
"SPD" = sum(`SPD`)) -> tb |
|
|
|
``` |
|
|
|
|
|
|
|
For plotting our results we reorganize them a bit and produce a bar plot: |
|
|
|
|
|
|
|
```{r, fig.width=7} |
|
|
|
pivot_longer(tb, where(is.numeric), "by_fraktion", "count") %>% |
|
|
|
filter(!is.na(on_fraktion)) %>% |
|
|
|
rename(fraktion = on_fraktion) %>% |
|
|
|
bar_plot_fraktionen(value, |
|
|
|
fill = by_fraktion, |
|
|
|
title = "Number of rounds of applauses from fractions to fractions", |
|
|
|
xlab = "Applauded fraction", |
|
|
|
ylab = "Rounds of applauses", |
|
|
|
filllab = "Applauding fraction") |
|
|
|
``` |
|
|
|
|
|
|
|
|
|
|
|
### Which party comments the most on which parties? |
|
|
|
|
|
|
|
```{r} |
|
|
|
@@ -120,12 +140,25 @@ res$comments %>% |
|
|
|
`CDU/CSU` = sum(str_detect(by_fraktion, "CDU/CSU"), na.rm = T), |
|
|
|
`DIE LINKE` = sum(str_detect(by_fraktion, "DIE LINKE"), na.rm=T), |
|
|
|
`FDP` = sum(str_detect(by_fraktion, "FDP"), na.rm=T), |
|
|
|
`SPD` = sum(str_detect(by_fraktion, "SPD"), na.rm=T)) |
|
|
|
`SPD` = sum(str_detect(by_fraktion, "SPD"), na.rm=T)) -> tb |
|
|
|
``` |
|
|
|
Analogously we plot the results: |
|
|
|
|
|
|
|
```{r, fig.width=7} |
|
|
|
pivot_longer(tb, where(is.numeric), "by_fraktion", "count") %>% |
|
|
|
filter(!is.na(on_fraktion)) %>% |
|
|
|
rename(fraktion = on_fraktion) %>% |
|
|
|
bar_plot_fraktionen(value, |
|
|
|
fill = by_fraktion, |
|
|
|
title = "Number of comments from fractions to fractions", |
|
|
|
xlab = "Commented fraction", |
|
|
|
ylab = "Number of comments", |
|
|
|
filllab = "Commenting fraction") |
|
|
|
``` |
|
|
|
|
|
|
|
### When are which topics discussed the most? |
|
|
|
|
|
|
|
```{r, fig.width=8} |
|
|
|
```{r, fig.width=7} |
|
|
|
pandemic_pattern <- "(?i)virus|corona|covid|lockdown" |
|
|
|
climate_pattern <- "(?i)klimawandel|erderwärmung|co2|treibhaus|methan|kyoto-protokoll|klimaabkommen" |
|
|
|
pension_pattern <- "(?i)rente|pension|altersarmut" |
|
|
|
|