| @@ -20,11 +20,11 @@ find_word <- function(res, word) { | |||
| #' add information from speaker table to a tibble containing speaker id | |||
| #' | |||
| #' @param tb tibble | |||
| #' @param res tibble | |||
| #' @param fraction_only bool | |||
| #' @param res list of tibbles | |||
| #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble | |||
| #' | |||
| #' left join speaker information from res$speaker into tb. | |||
| #' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble | |||
| #' if fraction_only | |||
| #' | |||
| #' @export | |||
| join_speaker <- function(tb, res, fraction_only = F) { | |||
| @@ -33,7 +33,7 @@ join_speaker <- function(tb, res, fraction_only = F) { | |||
| else joined | |||
| } | |||
| #' lookup table for party colors | |||
| #' lookup table for official party colors | |||
| #' | |||
| #' @export | |||
| party_colors <- c( | |||
| @@ -51,27 +51,27 @@ party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos", | |||
| "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU", | |||
| "FDP", "AfD", NA_character_)) | |||
| #' plot data depending on fractions in a standardized, configurable way | |||
| #' Bar chart visualizing fraction based data | |||
| #' | |||
| #' Can be configured to also visualize data not related to fractions. | |||
| #' | |||
| #' @param tb tibble | |||
| #' @param x_variable column in tb | |||
| #' @param y_variable column in tb | |||
| #' @param fill column in tb | |||
| #' @param title char | |||
| #' @param xlab char | |||
| #' @param ylab char | |||
| #' @param filllab char | |||
| #' @param flipped bool | |||
| #' @param position char | |||
| #' @param reorder bool | |||
| #' @param x_variable column in tb, default is fraction | |||
| #' @param y_variable column in tb, default is n | |||
| #' @param fill column in tb, default is fraction | |||
| #' @param title plot title | |||
| #' @param xlab label for x axis, default is fraction | |||
| #' @param ylab label for y axis, default is n | |||
| #' @param filllab default is 'Fraction' | |||
| #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE | |||
| #' @param position default is 'dodge' | |||
| #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default). | |||
| #' | |||
| #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. | |||
| #' Then color the plot depending on the fill value | |||
| #' Give the plot a title, an x-label xlab as well as an y-label ylab | |||
| #' Color the legend according to filllab | |||
| #' Setting flipped to TRUE makes the bars horizontal | |||
| #' Improve positioning details according to position | |||
| #' and finally reorder x_variable (default ist to order fractions according to seat order) | |||
| #' Then color the plot depending on the fill value. | |||
| #' Give the plot a title and a label for x-axis and y-axis, | |||
| #' color the legend according to filllab and finally | |||
| #' improve positioning details according to position | |||
| #' | |||
| #' @export | |||
| bar_plot_fractions <- function(tb, | |||
| @@ -116,16 +116,15 @@ bar_plot_fractions <- function(tb, | |||
| if (flipped) plt + coord_flip() else plt | |||
| } | |||
| #' Counts how many talks do match a given pattern and summarises by date | |||
| #' | |||
| #' @param res tibble | |||
| #' @param patterns char list | |||
| #' @param name char ? what is name needed for?? | |||
| #' @param tidy bool, default F | |||
| #' | |||
| #' shorter summary if tidy=F | |||
| #' if tidy is set to T, the resulting tibble is tidy | |||
| #' | |||
| #' Word usage summarised by date | |||
| #' | |||
| #' Counts how many talks do match a given pattern and summarises by date. | |||
| #' | |||
| #' @param res List of Tibbles to be analysed. | |||
| #' @param patterns Words to look up. | |||
| #' @param name ? | |||
| #' @param tidy default is FALSE. | |||
| #' | |||
| #' @export | |||
| word_usage_by_date <- function(res, patterns, name, tidy=F) { | |||
| tb <- res$talks | |||
| @@ -205,13 +205,11 @@ parse_speakerlist <- function(speakerliste_xml) { | |||
| rolle_lang = d["rolle_lang",]) | |||
| } | |||
| #' Write the parsed and repaired results into a csv file to make loading and developing faster and easier | |||
| #' Write the parsed and repaired results into separate csv files | |||
| #' | |||
| #' @param tables tibble list | |||
| #' @param path char | |||
| #' @param create bool | |||
| #' | |||
| #' if create is set to TRUE, the directory given in path is created | |||
| #' @param tables list of tables to convert into a csv files. | |||
| #' @param path where to put the csv files. | |||
| #' @param create set TRUE if the path does not exist yet and you want to create it | |||
| #' | |||
| #' @export | |||
| write_to_csv <- function(tables, path="data/csv/", create=F) { | |||
| @@ -223,11 +221,12 @@ write_to_csv <- function(tables, path="data/csv/", create=F) { | |||
| write.table(tables$applause, str_c(path, "applause.csv")) | |||
| } | |||
| #' Read the needed tables for developing from a csv file. | |||
| #' | |||
| #' @param path char | |||
| #' | |||
| #' Reading the tables from a csv is way faster than reading and repairing the data every single time | |||
| #' create a tibble from the csv file | |||
| #' | |||
| #' @param path directory to read files from | |||
| #' | |||
| #' reading the tables from a csv is way faster than reading and repairing the data every single time | |||
| #' | |||
| #' @export | |||
| read_from_csv <- function(path="data/csv/") { | |||
| @@ -2,7 +2,7 @@ | |||
| % Please edit documentation in R/analyze.R | |||
| \name{bar_plot_fractions} | |||
| \alias{bar_plot_fractions} | |||
| \title{plot data depending on fractions in a standardized, configurable way} | |||
| \title{Bar chart visualizing fraction based data} | |||
| \usage{ | |||
| bar_plot_fractions( | |||
| tb, | |||
| @@ -21,34 +21,32 @@ bar_plot_fractions( | |||
| \arguments{ | |||
| \item{tb}{tibble} | |||
| \item{x_variable}{column in tb} | |||
| \item{x_variable}{column in tb, default is fraction} | |||
| \item{y_variable}{column in tb} | |||
| \item{y_variable}{column in tb, default is n} | |||
| \item{fill}{column in tb} | |||
| \item{fill}{column in tb, default is fraction} | |||
| \item{title}{char} | |||
| \item{title}{plot title} | |||
| \item{xlab}{char} | |||
| \item{xlab}{label for x axis, default is fraction} | |||
| \item{ylab}{char} | |||
| \item{ylab}{label for y axis, default is n} | |||
| \item{filllab}{char} | |||
| \item{filllab}{default is 'Fraction'} | |||
| \item{flipped}{bool} | |||
| \item{flipped}{if TRUE draw bars horizontally, else vertically. Default is TRUE} | |||
| \item{position}{char} | |||
| \item{position}{default is 'dodge'} | |||
| \item{reorder}{bool | |||
| \item{reorder}{Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default). | |||
| plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. | |||
| Then color the plot depending on the fill value | |||
| Give the plot a title, an x-label xlab as well as an y-label ylab | |||
| Color the legend according to filllab | |||
| Setting flipped to TRUE makes the bars horizontal | |||
| Improve positioning details according to position | |||
| and finally reorder x_variable (default ist to order fractions according to seat order)} | |||
| Then color the plot depending on the fill value. | |||
| Give the plot a title and a label for x-axis and y-axis, | |||
| color the legend according to filllab and finally | |||
| improve positioning details according to position} | |||
| } | |||
| \description{ | |||
| plot data depending on fractions in a standardized, configurable way | |||
| Can be configured to also visualize data not related to fractions. | |||
| } | |||
| @@ -8,6 +8,10 @@ fetch_all(download_dir = "data/records/", create = FALSE) | |||
| } | |||
| \arguments{ | |||
| \item{download_dir}{character} | |||
| \item{create}{bool | |||
| if create is TRUE, the directory given in download_dir is created} | |||
| } | |||
| \description{ | |||
| This fetches all available records of the 19th legislative period of the german Bundestag. | |||
| @@ -9,12 +9,12 @@ join_speaker(tb, res, fraction_only = F) | |||
| \arguments{ | |||
| \item{tb}{tibble} | |||
| \item{res}{tibble} | |||
| \item{res}{list of tibbles} | |||
| \item{fraction_only}{bool | |||
| \item{fraction_only}{if TRUE, only select fraction from the resulting joined tibble | |||
| left join speaker information from res$speaker into tb. | |||
| if fraction_only is TRUE, only fraction is selected from the resulting joined tibble} | |||
| if fraction_only} | |||
| } | |||
| \description{ | |||
| add information from speaker table to a tibble containing speaker id | |||
| @@ -3,7 +3,7 @@ | |||
| \docType{data} | |||
| \name{party_colors} | |||
| \alias{party_colors} | |||
| \title{lookup table for party colors} | |||
| \title{lookup table for official party colors} | |||
| \format{ | |||
| An object of class \code{character} of length 8. | |||
| } | |||
| @@ -11,6 +11,6 @@ An object of class \code{character} of length 8. | |||
| party_colors | |||
| } | |||
| \description{ | |||
| lookup table for party colors | |||
| lookup table for official party colors | |||
| } | |||
| \keyword{datasets} | |||
| @@ -2,15 +2,15 @@ | |||
| % Please edit documentation in R/parse.R | |||
| \name{read_from_csv} | |||
| \alias{read_from_csv} | |||
| \title{Read the needed tables for developing from a csv file.} | |||
| \title{create a tibble from the csv file} | |||
| \usage{ | |||
| read_from_csv(path = "data/csv/") | |||
| } | |||
| \arguments{ | |||
| \item{path}{char | |||
| \item{path}{directory to read files from | |||
| Reading the tables from a csv is way faster than reading and repairing the data every single time} | |||
| reading the tables from a csv is way faster than reading and repairing the data every single time} | |||
| } | |||
| \description{ | |||
| Read the needed tables for developing from a csv file. | |||
| create a tibble from the csv file | |||
| } | |||
| @@ -2,22 +2,19 @@ | |||
| % Please edit documentation in R/analyze.R | |||
| \name{word_usage_by_date} | |||
| \alias{word_usage_by_date} | |||
| \title{Counts how many talks do match a given pattern and summarises by date} | |||
| \title{Word usage summarised by date} | |||
| \usage{ | |||
| word_usage_by_date(res, patterns, name, tidy = F) | |||
| } | |||
| \arguments{ | |||
| \item{res}{tibble} | |||
| \item{res}{List of Tibbles to be analysed.} | |||
| \item{patterns}{char list} | |||
| \item{patterns}{Words to look up.} | |||
| \item{name}{char ? what is name needed for??} | |||
| \item{name}{?} | |||
| \item{tidy}{bool, default F | |||
| shorter summary if tidy=F | |||
| if tidy is set to T, the resulting tibble is tidy} | |||
| \item{tidy}{default is FALSE.} | |||
| } | |||
| \description{ | |||
| Counts how many talks do match a given pattern and summarises by date | |||
| Counts how many talks do match a given pattern and summarises by date. | |||
| } | |||
| @@ -2,19 +2,17 @@ | |||
| % Please edit documentation in R/parse.R | |||
| \name{write_to_csv} | |||
| \alias{write_to_csv} | |||
| \title{Write the parsed and repaired results into a csv file to make loading and developing faster and easier} | |||
| \title{Write the parsed and repaired results into separate csv files} | |||
| \usage{ | |||
| write_to_csv(tables, path = "data/csv/", create = F) | |||
| } | |||
| \arguments{ | |||
| \item{tables}{tibble list} | |||
| \item{tables}{list of tables to convert into a csv files.} | |||
| \item{path}{char} | |||
| \item{path}{where to put the csv files.} | |||
| \item{create}{bool | |||
| if create is set to TRUE, the directory given in path is created} | |||
| \item{create}{set TRUE if the path does not exist yet and you want to create it} | |||
| } | |||
| \description{ | |||
| Write the parsed and repaired results into a csv file to make loading and developing faster and easier | |||
| Write the parsed and repaired results into separate csv files | |||
| } | |||
| @@ -0,0 +1,96 @@ | |||
| --- | |||
| title: "genderequality" | |||
| output: rmarkdown::html_vignette | |||
| vignette: > | |||
| %\VignetteIndexEntry{genderequality} | |||
| %\VignetteEngine{knitr::rmarkdown} | |||
| %\VignetteEncoding{UTF-8} | |||
| --- | |||
| ```{r, include = FALSE} | |||
| knitr::opts_chunk$set( | |||
| collapse = TRUE, | |||
| comment = "#>" | |||
| ) | |||
| ``` | |||
| ```{r setup} | |||
| library(hateimparlament) | |||
| library(dplyr) | |||
| library(ggplot2) | |||
| library(stringr) | |||
| library(tidyr) | |||
| library(rvest) | |||
| ``` | |||
| ## Preparation of data | |||
| First, you need to download all records of the current legislative period. | |||
| ```r | |||
| fetch_all("../records/") # path to directory where records should be stored | |||
| ``` | |||
| Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: | |||
| ```r | |||
| read_all("../records/") %>% repair() -> res | |||
| ``` | |||
| We also used `repair` to fix a bunch of formatting issues in the records and unpacked | |||
| the result into more descriptive variables. | |||
| For development purposes, we load the tables from csv files. | |||
| ```{r} | |||
| res <- read_from_csv('../csv/') | |||
| ``` | |||
| and unpack our tibbles | |||
| ```{r} | |||
| comments <- res$comments | |||
| speeches <- res$speeches | |||
| speaker <- res$speaker | |||
| talks <- res$talks | |||
| ``` | |||
| Bevor we can do our analysis, we have to assign a gender to our politicans. | |||
| ```{r} | |||
| extract_href <- function(sel, html) { | |||
| html %>% | |||
| html_node(sel) %>% | |||
| html_attr("href") | |||
| } | |||
| first_content_p_text <- function(url) { | |||
| res <- NA | |||
| i <- 1 | |||
| while(is.na(res)) { | |||
| read_html(url) %>% | |||
| html_node(str_glue("#mw-content-text > div.mw-parser-output > p:nth-child({i})")) %>% | |||
| html_text() -> res | |||
| i <- i + 1 | |||
| } | |||
| res | |||
| } | |||
| abgeordneten_list_html <- read_html( | |||
| "https://de.wikipedia.org/wiki/Liste_der_Mitglieder_des_Deutschen_Bundestages_(19._Wahlperiode)") | |||
| selectors <- str_glue("#mw-content-text > div.mw-parser-output > table:nth-child(20) > tbody > tr:nth-child({2:709}) > td:nth-child(2) > a") | |||
| link_part2 <- sapply(selectors, extract_href, abgeordneten_list_html) | |||
| link <- str_c("https://de.wikipedia.org", link_part2) | |||
| text <- sapply(link, first_content_p_text) | |||
| text %>% | |||
| str_extract(" ist ein.") %>% | |||
| str_replace(" ist eine", "female") %>% | |||
| str_replace(" ist ein ", "male") -> | |||
| gender | |||
| text %>% | |||
| str_extract("^([:upper:]?[:lower:]+[\\s\\-]?)*") %>% | |||
| str_trim() -> | |||
| names | |||
| gender <- tibble(speaker = names, | |||
| gender = gender) | |||
| ``` | |||