diff --git a/R/analyze.R b/R/analyze.R index 18dd453..14ec9d6 100644 --- a/R/analyze.R +++ b/R/analyze.R @@ -12,22 +12,28 @@ join_redner <- function(tb, res, fraktion_only = F) { else joined } +#' @export party_colors <- c( - SPD="#DF0B25", - "CDU/CSU"="#000000", AfD="#1A9FDD", - "AfD&Fraktionslos"="#1A9FDD", - "DIE LINKE"="#BC3475", - "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B", FDP="#FEEB34", + "CDU/CSU"="#000000", + SPD="#DF0B25", + "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B", + "DIE LINKE"="#BC3475", + "AfD&Fraktionslos"="#1A9FDD", Fraktionslos="#FEEB34" ) #' @export -bar_plot_fraktionen <- function(tb) { - ggplot(tb, aes(x = reorder(fraktion, -n), y = n, fill = fraktion)) + - scale_fill_manual(values = party_colors) + - geom_bar(stat = "identity") +bar_plot_fraktionen <- function(tb, variable, fill, title=NULL, xlab = "Fraction", + ylab="n", filllab="Fraction") { + ggplot(tb, aes(x = reorder(fraktion, -{{variable}}), y = {{variable}}, fill = {{fill}})) + + scale_fill_manual(values = party_colors) + + xlab(xlab) + + ylab(ylab) + + labs(fill = filllab) + + ggtitle(title) + + geom_bar(stat = "identity") } # Counts how many talks do match a given pattern and summarises by date diff --git a/vignettes/funwithdata.Rmd b/vignettes/funwithdata.Rmd index 6fc9370..833f1a7 100644 --- a/vignettes/funwithdata.Rmd +++ b/vignettes/funwithdata.Rmd @@ -18,6 +18,8 @@ knitr::opts_chunk$set( library(hateimparlament) library(dplyr) library(ggplot2) +library(stringr) +library(tidyr) ``` ## Preparation of data @@ -48,25 +50,28 @@ talks <- res$talks ## Analysis Now we can start analysing our parsed dataset, e.g. find out which party gives the most talks: -```{r, fig.width=8} +```{r, fig.width=7} join_redner(res$reden, res) %>% group_by(fraktion) %>% summarize(n = n()) %>% arrange(n) %>% - bar_plot_fraktionen() + bar_plot_fraktionen(n, fill = fraktion, title="Number of speeches given by fraction", ylab="Number of speeches") ``` -### Count a word occurence +or counting the occurences of a given word: -```{r, fig.width=8} -find_word(res, "hitler") %>% +```{r, fig.width=7} +find_word(res, "Kohleausstieg") %>% filter(occurences > 0) %>% join_redner(res) %>% select(content, fraktion) %>% + filter(!is.na(fraktion)) %>% group_by(fraktion) %>% summarize(n = n()) %>% arrange(desc(n)) %>% - bar_plot_fraktionen() + bar_plot_fraktionen(n, fill = fraktion, + title = "Parties using the word 'Kohleausstieg' the most (absolutely)", + ylab = "Number of uses of 'Kohleausstieg'") ``` ### Who gives the most speeches? @@ -105,9 +110,24 @@ res$applause %>% "CDU/CSU" = sum(`CDU_CSU`), "DIE LINKE" = sum(`DIE_LINKE`), "FDP" = sum(`FDP`), - "SPD" = sum(`SPD`)) + "SPD" = sum(`SPD`)) -> tb ``` +For plotting our results we reorganize them a bit and produce a bar plot: + +```{r, fig.width=7} +pivot_longer(tb, where(is.numeric), "by_fraktion", "count") %>% + filter(!is.na(on_fraktion)) %>% + rename(fraktion = on_fraktion) %>% + bar_plot_fraktionen(value, + fill = by_fraktion, + title = "Number of rounds of applauses from fractions to fractions", + xlab = "Applauded fraction", + ylab = "Rounds of applauses", + filllab = "Applauding fraction") +``` + + ### Which party comments the most on which parties? ```{r} @@ -120,12 +140,25 @@ res$comments %>% `CDU/CSU` = sum(str_detect(by_fraktion, "CDU/CSU"), na.rm = T), `DIE LINKE` = sum(str_detect(by_fraktion, "DIE LINKE"), na.rm=T), `FDP` = sum(str_detect(by_fraktion, "FDP"), na.rm=T), - `SPD` = sum(str_detect(by_fraktion, "SPD"), na.rm=T)) + `SPD` = sum(str_detect(by_fraktion, "SPD"), na.rm=T)) -> tb +``` +Analogously we plot the results: + +```{r, fig.width=7} +pivot_longer(tb, where(is.numeric), "by_fraktion", "count") %>% + filter(!is.na(on_fraktion)) %>% + rename(fraktion = on_fraktion) %>% + bar_plot_fraktionen(value, + fill = by_fraktion, + title = "Number of comments from fractions to fractions", + xlab = "Commented fraction", + ylab = "Number of comments", + filllab = "Commenting fraction") ``` ### When are which topics discussed the most? -```{r, fig.width=8} +```{r, fig.width=7} pandemic_pattern <- "(?i)virus|corona|covid|lockdown" climate_pattern <- "(?i)klimawandel|erderwärmung|co2|treibhaus|methan|kyoto-protokoll|klimaabkommen" pension_pattern <- "(?i)rente|pension|altersarmut"