| @@ -0,0 +1,85 @@ | |||||
| --- | |||||
| title: "explicittopic" | |||||
| output: rmarkdown::html_vignette | |||||
| vignette: > | |||||
| %\VignetteIndexEntry{explicittopic} | |||||
| %\VignetteEngine{knitr::rmarkdown} | |||||
| %\VignetteEncoding{UTF-8} | |||||
| --- | |||||
| ```{r, include = FALSE} | |||||
| knitr::opts_chunk$set( | |||||
| collapse = TRUE, | |||||
| comment = "#>" | |||||
| ) | |||||
| ``` | |||||
| ```{r setup} | |||||
| library(hateimparlament) | |||||
| library(dplyr) | |||||
| library(ggplot2) | |||||
| library(stringr) | |||||
| library(tidyr) | |||||
| ``` | |||||
| ## Preparation of data | |||||
| First, you need to download all records of the current legislative period. | |||||
| ```r | |||||
| fetch_all("../inst/records/") # path to directory where records should be stored | |||||
| ``` | |||||
| Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: | |||||
| ```r | |||||
| read_all("../inst/records/") %>% repair() -> res | |||||
| ``` | |||||
| We also used `repair` to fix a bunch of formatting issues in the records and unpacked | |||||
| the result into more descriptive variables. | |||||
| For development purposes, we load the tables from csv files. | |||||
| ```{r} | |||||
| res <- read_from_csv('../inst/csv/') | |||||
| ``` | |||||
| and unpack our tibbles | |||||
| ```{r} | |||||
| comments <- res$comments | |||||
| speeches <- res$speeches | |||||
| speaker <- res$speaker | |||||
| talks <- res$talks | |||||
| ``` | |||||
| ## Analysis | |||||
| Now we can start analysing our parsed dataset: | |||||
| ### Counting the occurences of a given word: | |||||
| ```{r, fig.width=7} | |||||
| find_word(res, "Kohleausstieg") %>% | |||||
| filter(occurences > 0) %>% | |||||
| join_speaker(res) %>% | |||||
| select(content, fraction) %>% | |||||
| filter(!is.na(fraction)) %>% | |||||
| group_by(fraction) %>% | |||||
| summarize(n = n()) %>% | |||||
| arrange(desc(n)) %>% | |||||
| bar_plot_fractions(title = "Parties using the word 'Kohleausstieg' the most (absolutely)", | |||||
| ylab = "Number of uses of 'Kohleausstieg'", | |||||
| flipped = F) | |||||
| ``` | |||||
| ### When are which topics discussed the most? | |||||
| ```{r, fig.width=7} | |||||
| pandemic_pattern <- "(?i)virus|corona|covid|lockdown" | |||||
| climate_pattern <- "(?i)klimawandel|erderwärmung|co2|treibhaus|methan|kyoto-protokoll|klimaabkommen" | |||||
| pension_pattern <- "(?i)rente|pension|altersarmut" | |||||
| word_usage_by_date(res, c(pandemic = pandemic_pattern, | |||||
| climate = climate_pattern, | |||||
| pension = pension_pattern)) %>% | |||||
| ggplot(aes(x = date, y = count, color = pattern)) + | |||||
| xlab("date of session") + | |||||
| ylab("occurence of word per session") + | |||||
| labs(color = "Topic") + | |||||
| geom_point() | |||||
| ``` | |||||
| @@ -0,0 +1,86 @@ | |||||
| --- | |||||
| title: "generalquestions" | |||||
| output: rmarkdown::html_vignette | |||||
| vignette: > | |||||
| %\VignetteIndexEntry{generalquestions} | |||||
| %\VignetteEngine{knitr::rmarkdown} | |||||
| %\VignetteEncoding{UTF-8} | |||||
| --- | |||||
| ```{r, include = FALSE} | |||||
| knitr::opts_chunk$set( | |||||
| collapse = TRUE, | |||||
| comment = "#>" | |||||
| ) | |||||
| ``` | |||||
| ```{r setup} | |||||
| library(hateimparlament) | |||||
| library(dplyr) | |||||
| library(ggplot2) | |||||
| library(stringr) | |||||
| library(tidyr) | |||||
| ``` | |||||
| ## Preparation of data | |||||
| First, you need to download all records of the current legislative period. | |||||
| ```r | |||||
| fetch_all("../inst/records/") # path to directory where records should be stored | |||||
| ``` | |||||
| Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: | |||||
| ```r | |||||
| read_all("../inst/records/") %>% repair() -> res | |||||
| ``` | |||||
| We also used `repair` to fix a bunch of formatting issues in the records and unpacked | |||||
| the result into more descriptive variables. | |||||
| For development purposes, we load the tables from csv files. | |||||
| ```{r} | |||||
| res <- read_from_csv('../inst/csv/') | |||||
| ``` | |||||
| and unpack our tibbles | |||||
| ```{r} | |||||
| comments <- res$comments | |||||
| speeches <- res$speeches | |||||
| speaker <- res$speaker | |||||
| talks <- res$talks | |||||
| ``` | |||||
| ## Analysis | |||||
| Now we can start analysing our parsed dataset: | |||||
| ### Which partie gives the most talkes? | |||||
| ```{r, fig.width=7} | |||||
| join_speaker(res$speeches, res) %>% | |||||
| group_by(fraction) %>% | |||||
| summarize(n = n()) %>% | |||||
| arrange(n) %>% | |||||
| bar_plot_fractions(title="Number of speeches given by fraction", | |||||
| ylab="Number of speeches") | |||||
| ``` | |||||
| ### Who gives the most speeches? | |||||
| ```{r} | |||||
| res$speeches %>% | |||||
| group_by(speaker) %>% | |||||
| summarize(n = n()) %>% | |||||
| arrange(-n) %>% | |||||
| left_join(res$speaker, by=c("speaker" = "id")) %>% | |||||
| head(10) | |||||
| ``` | |||||
| ### Who talks the longest? | |||||
| ```{r} | |||||
| res$talks %>% | |||||
| mutate(content_len = str_length(content)) %>% | |||||
| group_by(speaker) %>% | |||||
| summarize(avg_content_len = mean(content_len)) %>% | |||||
| arrange(-avg_content_len) %>% | |||||
| left_join(res$speaker, by=c("speaker" = "id")) %>% | |||||
| head(10) | |||||
| ``` | |||||
| @@ -0,0 +1,113 @@ | |||||
| --- | |||||
| title: "interaction" | |||||
| output: rmarkdown::html_vignette | |||||
| vignette: > | |||||
| %\VignetteIndexEntry{interaction} | |||||
| %\VignetteEngine{knitr::rmarkdown} | |||||
| %\VignetteEncoding{UTF-8} | |||||
| --- | |||||
| ```{r, include = FALSE} | |||||
| knitr::opts_chunk$set( | |||||
| collapse = TRUE, | |||||
| comment = "#>" | |||||
| ) | |||||
| ``` | |||||
| ```{r setup} | |||||
| library(hateimparlament) | |||||
| library(dplyr) | |||||
| library(ggplot2) | |||||
| library(stringr) | |||||
| library(tidyr) | |||||
| ``` | |||||
| ## Preparation of data | |||||
| First, you need to download all records of the current legislative period. | |||||
| ```r | |||||
| fetch_all("../inst/records/") # path to directory where records should be stored | |||||
| ``` | |||||
| Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: | |||||
| ```r | |||||
| read_all("../inst/records/") %>% repair() -> res | |||||
| ``` | |||||
| We also used `repair` to fix a bunch of formatting issues in the records and unpacked | |||||
| the result into more descriptive variables. | |||||
| For development purposes, we load the tables from csv files. | |||||
| ```{r} | |||||
| res <- read_from_csv('../inst/csv/') | |||||
| ``` | |||||
| and unpack our tibbles | |||||
| ```{r} | |||||
| comments <- res$comments | |||||
| speeches <- res$speeches | |||||
| speaker <- res$speaker | |||||
| talks <- res$talks | |||||
| ``` | |||||
| ## Analysis | |||||
| Now we can start analysing our parsed dataset: | |||||
| ### Which party gives the most applause to which parties? | |||||
| ```{r} | |||||
| res$applause %>% | |||||
| left_join(res$speaker, by=c("on_speaker" = "id")) %>% | |||||
| select(on_fraction = fraction, where(is.logical)) %>% | |||||
| group_by(on_fraction) %>% | |||||
| arrange(on_fraction) %>% | |||||
| summarize("AfD" = sum(`AfD`), | |||||
| "BÜNDNIS 90 / DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`), | |||||
| "CDU/CSU" = sum(`CDU_CSU`), | |||||
| "DIE LINKE" = sum(`DIE_LINKE`), | |||||
| "FDP" = sum(`FDP`), | |||||
| "SPD" = sum(`SPD`)) -> tb | |||||
| ``` | |||||
| For plotting our results we reorganize them a bit and produce a bar plot: | |||||
| ```{r, fig.width=7} | |||||
| pivot_longer(tb, where(is.numeric), "by_fraction", "count") %>% | |||||
| filter(!is.na(on_fraction)) %>% | |||||
| bar_plot_fractions(x_variable = on_fraction, | |||||
| y_variable = value, | |||||
| fill = by_fraction, | |||||
| title = "Number of rounds of applauses from fractions to fractions", | |||||
| xlab = "Applauded fraction", | |||||
| ylab = "Rounds of applauses", | |||||
| filllab = "Applauding fraction", | |||||
| flipped = FALSE) | |||||
| ``` | |||||
| ### Which party comments the most on which parties? | |||||
| ```{r} | |||||
| res$comments %>% | |||||
| left_join(res$speaker, by=c("on_speaker" = "id")) %>% | |||||
| select(by_fraction = fraction.x, on_fraction = fraction.y) %>% | |||||
| group_by(on_fraction) %>% | |||||
| summarize(`AfD` = sum(str_detect(by_fraction, "AfD"), na.rm=T), | |||||
| `BÜNDNIS 90 / DIE GRÜNEN` = sum(str_detect(by_fraction, "BÜNDNIS 90/DIE GRÜNEN"), na.rm=T), | |||||
| `CDU/CSU` = sum(str_detect(by_fraction, "CDU/CSU"), na.rm = T), | |||||
| `DIE LINKE` = sum(str_detect(by_fraction, "DIE LINKE"), na.rm=T), | |||||
| `FDP` = sum(str_detect(by_fraction, "FDP"), na.rm=T), | |||||
| `SPD` = sum(str_detect(by_fraction, "SPD"), na.rm=T)) -> tb | |||||
| ``` | |||||
| Analogously we plot the results: | |||||
| ```{r, fig.width=7} | |||||
| pivot_longer(tb, where(is.numeric), "by_fraction", "count") %>% | |||||
| filter(!is.na(on_fraction)) %>% | |||||
| bar_plot_fractions(x_variable = on_fraction, | |||||
| y_variable = value, | |||||
| fill = by_fraction, | |||||
| title = "Number of comments from fractions to fractions", | |||||
| xlab = "Commented fraction", | |||||
| ylab = "Number of comments", | |||||
| filllab = "Commenting fraction", | |||||
| flipped = FALSE) | |||||
| ``` | |||||