From d5f74c5bf37c265f615946ff43dcfd880b5c3c17 Mon Sep 17 00:00:00 2001
From: Leon Burgard <leon.burgard@web.de>
Date: Sun, 8 Aug 2021 00:49:26 +0200
Subject: [PATCH] organized vignettes

---
 vignettes/explicittopic.Rmd    |  85 +++++++++++++++++++++++++
 vignettes/generalquestions.Rmd |  86 +++++++++++++++++++++++++
 vignettes/interaction.Rmd      | 113 +++++++++++++++++++++++++++++++++
 3 files changed, 284 insertions(+)
 create mode 100644 vignettes/explicittopic.Rmd
 create mode 100644 vignettes/generalquestions.Rmd
 create mode 100644 vignettes/interaction.Rmd

diff --git a/vignettes/explicittopic.Rmd b/vignettes/explicittopic.Rmd
new file mode 100644
index 0000000..5149ecf
--- /dev/null
+++ b/vignettes/explicittopic.Rmd
@@ -0,0 +1,85 @@
+---
+title: "explicittopic"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{explicittopic}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(hateimparlament)
+library(dplyr)
+library(ggplot2)
+library(stringr)
+library(tidyr)
+```
+
+## Preparation of data
+
+First, you need to download all records of the current legislative period.
+```r
+fetch_all("../inst/records/") # path to directory where records should be stored
+```
+Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
+```r
+read_all("../inst/records/") %>% repair() -> res
+```
+We also used `repair` to fix a bunch of formatting issues in the records and unpacked
+the result into more descriptive variables.
+
+For development purposes, we load the tables from csv files.
+```{r}
+res <- read_from_csv('../inst/csv/')
+```
+and unpack our tibbles
+```{r}
+comments <- res$comments
+speeches <- res$speeches
+speaker <- res$speaker
+talks <- res$talks
+```
+
+## Analysis
+
+Now we can start analysing our parsed dataset:
+
+### Counting the occurences of a given word:
+
+```{r, fig.width=7}
+find_word(res, "Kohleausstieg") %>%
+    filter(occurences > 0) %>%
+    join_speaker(res) %>%
+    select(content, fraction) %>%
+    filter(!is.na(fraction)) %>%
+    group_by(fraction) %>%
+    summarize(n = n()) %>%
+    arrange(desc(n)) %>%
+    bar_plot_fractions(title = "Parties using the word 'Kohleausstieg' the most (absolutely)",
+                        ylab = "Number of uses of 'Kohleausstieg'",
+                        flipped = F)
+```
+
+### When are which topics discussed the most?
+
+```{r, fig.width=7}
+pandemic_pattern <- "(?i)virus|corona|covid|lockdown"
+climate_pattern <- "(?i)klimawandel|erderwärmung|co2|treibhaus|methan|kyoto-protokoll|klimaabkommen"
+pension_pattern <- "(?i)rente|pension|altersarmut"
+
+word_usage_by_date(res, c(pandemic = pandemic_pattern,
+                          climate = climate_pattern,
+                          pension = pension_pattern)) %>%
+    ggplot(aes(x = date, y = count, color = pattern)) +
+    xlab("date of session") +
+    ylab("occurence of word per session") +
+    labs(color = "Topic") +
+    geom_point()
+```
diff --git a/vignettes/generalquestions.Rmd b/vignettes/generalquestions.Rmd
new file mode 100644
index 0000000..68241d2
--- /dev/null
+++ b/vignettes/generalquestions.Rmd
@@ -0,0 +1,86 @@
+---
+title: "generalquestions"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{generalquestions}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(hateimparlament)
+library(dplyr)
+library(ggplot2)
+library(stringr)
+library(tidyr)
+```
+
+## Preparation of data
+
+First, you need to download all records of the current legislative period.
+```r
+fetch_all("../inst/records/") # path to directory where records should be stored
+```
+Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
+```r
+read_all("../inst/records/") %>% repair() -> res
+```
+We also used `repair` to fix a bunch of formatting issues in the records and unpacked
+the result into more descriptive variables.
+
+For development purposes, we load the tables from csv files.
+```{r}
+res <- read_from_csv('../inst/csv/')
+```
+and unpack our tibbles
+```{r}
+comments <- res$comments
+speeches <- res$speeches
+speaker <- res$speaker
+talks <- res$talks
+```
+
+## Analysis
+
+Now we can start analysing our parsed dataset:
+
+### Which partie gives the most talkes?
+
+```{r, fig.width=7}
+join_speaker(res$speeches, res) %>%
+    group_by(fraction) %>%
+    summarize(n = n()) %>%
+    arrange(n) %>%
+    bar_plot_fractions(title="Number of speeches given by fraction",
+                        ylab="Number of speeches")
+```
+
+### Who gives the most speeches?
+
+```{r}
+res$speeches %>%
+    group_by(speaker) %>%
+    summarize(n = n()) %>%
+    arrange(-n) %>%
+    left_join(res$speaker, by=c("speaker" = "id")) %>%
+    head(10)
+```
+
+### Who talks the longest?
+
+```{r}
+res$talks %>%
+    mutate(content_len = str_length(content)) %>%
+    group_by(speaker) %>%
+    summarize(avg_content_len = mean(content_len)) %>%
+    arrange(-avg_content_len) %>%
+    left_join(res$speaker, by=c("speaker" = "id")) %>%
+    head(10)
+```
diff --git a/vignettes/interaction.Rmd b/vignettes/interaction.Rmd
new file mode 100644
index 0000000..05a6c40
--- /dev/null
+++ b/vignettes/interaction.Rmd
@@ -0,0 +1,113 @@
+---
+title: "interaction"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{interaction}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(hateimparlament)
+library(dplyr)
+library(ggplot2)
+library(stringr)
+library(tidyr)
+```
+
+## Preparation of data
+
+First, you need to download all records of the current legislative period.
+```r
+fetch_all("../inst/records/") # path to directory where records should be stored
+```
+Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
+```r
+read_all("../inst/records/") %>% repair() -> res
+```
+We also used `repair` to fix a bunch of formatting issues in the records and unpacked
+the result into more descriptive variables.
+
+For development purposes, we load the tables from csv files.
+```{r}
+res <- read_from_csv('../inst/csv/')
+```
+and unpack our tibbles
+```{r}
+comments <- res$comments
+speeches <- res$speeches
+speaker <- res$speaker
+talks <- res$talks
+```
+
+## Analysis
+
+Now we can start analysing our parsed dataset:
+
+### Which party gives the most applause to which parties?
+
+```{r}
+res$applause %>%
+    left_join(res$speaker, by=c("on_speaker" = "id")) %>%
+    select(on_fraction = fraction, where(is.logical)) %>%
+    group_by(on_fraction) %>%
+    arrange(on_fraction) %>%
+    summarize("AfD" = sum(`AfD`),
+              "BÜNDNIS 90 / DIE GRÜNEN" = sum(`BUENDNIS_90_DIE_GRUENEN`),
+              "CDU/CSU" = sum(`CDU_CSU`),
+              "DIE LINKE" = sum(`DIE_LINKE`),
+              "FDP" = sum(`FDP`),
+              "SPD" = sum(`SPD`)) -> tb
+```
+
+For plotting our results we reorganize them a bit and produce a bar plot:
+
+```{r, fig.width=7}
+pivot_longer(tb, where(is.numeric), "by_fraction", "count") %>%
+    filter(!is.na(on_fraction)) %>%
+    bar_plot_fractions(x_variable = on_fraction,
+                        y_variable = value,
+                        fill = by_fraction,
+                        title = "Number of rounds of applauses from fractions to fractions",
+                        xlab = "Applauded fraction",
+                        ylab = "Rounds of applauses",
+                        filllab = "Applauding fraction",
+                        flipped = FALSE)
+```
+
+
+### Which party comments the most on which parties?
+
+```{r}
+res$comments %>%
+    left_join(res$speaker, by=c("on_speaker" = "id")) %>%
+    select(by_fraction = fraction.x, on_fraction = fraction.y) %>%
+    group_by(on_fraction) %>%
+    summarize(`AfD` = sum(str_detect(by_fraction, "AfD"), na.rm=T),
+              `BÜNDNIS 90 / DIE GRÜNEN` = sum(str_detect(by_fraction, "BÜNDNIS 90/DIE GRÜNEN"), na.rm=T),
+              `CDU/CSU` = sum(str_detect(by_fraction, "CDU/CSU"), na.rm = T),
+              `DIE LINKE` = sum(str_detect(by_fraction, "DIE LINKE"), na.rm=T),
+              `FDP` = sum(str_detect(by_fraction, "FDP"), na.rm=T),
+              `SPD` = sum(str_detect(by_fraction, "SPD"), na.rm=T)) -> tb
+```
+Analogously we plot the results:
+
+```{r, fig.width=7}
+pivot_longer(tb, where(is.numeric), "by_fraction", "count") %>%
+    filter(!is.na(on_fraction)) %>%
+    bar_plot_fractions(x_variable = on_fraction,
+                        y_variable = value,
+                        fill = by_fraction,
+                        title = "Number of comments from fractions to fractions",
+                        xlab = "Commented fraction",
+                        ylab = "Number of comments",
+                        filllab = "Commenting fraction",
+                        flipped = FALSE)
+```