diff --git a/R/analyze.R b/R/analyze.R
index 917cf68..0f4e8e7 100644
--- a/R/analyze.R
+++ b/R/analyze.R
@@ -20,11 +20,11 @@ find_word <- function(res, word) {
 #' add information from speaker table to a tibble containing speaker id
 #' 
 #' @param tb tibble
-#' @param res tibble
-#' @param fraction_only bool
+#' @param res list of tibbles
+#' @param fraction_only if TRUE, only select fraction from the resulting joined tibble
 #' 
 #' left join speaker information from res$speaker into tb.
-#' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble
+#' if fraction_only 
 #'
 #' @export
 join_speaker <- function(tb, res, fraction_only = F) {
@@ -33,7 +33,7 @@ join_speaker <- function(tb, res, fraction_only = F) {
     else joined
 }
 
-#' lookup table for party colors
+#' lookup table for official party colors
 #'
 #' @export
 party_colors <- c(
@@ -51,27 +51,27 @@ party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
                         "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
                         "FDP", "AfD", NA_character_))
 
-#' plot data depending on fractions in a standardized, configurable way
+#' Bar chart visualizing fraction based data
+#' 
+#' Can be configured to also visualize data not related to fractions.
 #' 
 #' @param tb tibble
-#' @param x_variable column in tb
-#' @param y_variable column in tb
-#' @param fill column in tb
-#' @param title char
-#' @param xlab char
-#' @param ylab char
-#' @param filllab char
-#' @param flipped bool
-#' @param position char
-#' @param reorder bool
+#' @param x_variable column in tb, default is fraction
+#' @param y_variable column in tb, default is n
+#' @param fill column in tb, default is fraction
+#' @param title plot title
+#' @param xlab label for x axis, default is fraction
+#' @param ylab label for y axis, default is n
+#' @param filllab default is 'Fraction'
+#' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE
+#' @param position default is 'dodge'
+#' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
 #' 
 #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
-#' Then color the plot depending on the fill value
-#' Give the plot a title, an x-label xlab as well as an y-label ylab
-#' Color the legend according to filllab
-#' Setting flipped to TRUE makes the bars horizontal
-#' Improve positioning details according to position
-#' and finally reorder x_variable (default ist to order fractions according to seat order)
+#' Then color the plot depending on the fill value.
+#' Give the plot a title and a label for x-axis and y-axis,
+#' color the legend according to filllab and finally
+#' improve positioning details according to position
 #' 
 #' @export
 bar_plot_fractions <- function(tb,
@@ -116,16 +116,15 @@ bar_plot_fractions <- function(tb,
     if (flipped) plt + coord_flip() else plt
 }
 
-#' Counts how many talks do match a given pattern and summarises by date
-#' 
-#' @param res tibble
-#' @param patterns char list
-#' @param name char ? what is name needed for??
-#' @param tidy bool, default F
-#' 
-#' shorter summary if tidy=F
-#' if tidy is set to T, the resulting tibble is tidy
-#' 
+#' Word usage summarised by date
+#'
+#' Counts how many talks do match a given pattern and summarises by date.
+#'
+#' @param res List of Tibbles to be analysed.
+#' @param patterns Words to look up.
+#' @param name ?
+#' @param tidy default is FALSE.
+#'
 #' @export
 word_usage_by_date <- function(res, patterns, name, tidy=F) {
     tb <- res$talks
diff --git a/R/parse.R b/R/parse.R
index e9c75e9..498dae7 100644
--- a/R/parse.R
+++ b/R/parse.R
@@ -205,13 +205,11 @@ parse_speakerlist <- function(speakerliste_xml) {
            rolle_lang = d["rolle_lang",])
 }
 
-#' Write the parsed and repaired results into a csv file to make loading and developing faster and easier
+#' Write the parsed and repaired results into separate csv files
 #' 
-#' @param tables tibble list
-#' @param path char
-#' @param create bool
-#' 
-#' if create is set to TRUE, the directory given in path is created
+#' @param tables list of tables to convert into a csv files.
+#' @param path where to put the csv files.
+#' @param create set TRUE if the path does not exist yet and you want to create it
 #' 
 #' @export
 write_to_csv <- function(tables, path="data/csv/", create=F) {
@@ -223,11 +221,12 @@ write_to_csv <- function(tables, path="data/csv/", create=F) {
     write.table(tables$applause, str_c(path, "applause.csv"))
 }
 
-#' Read the needed tables for developing from a csv file.
-#' 
-#' @param path char
-#' 
-#' Reading the tables from a csv is way faster than reading and repairing the data every single time
+
+#' create a tibble from the csv file
+#'
+#' @param path directory to read files from
+#'
+#' reading the tables from a csv is way faster than reading and repairing the data every single time
 #' 
 #' @export
 read_from_csv <- function(path="data/csv/") {
diff --git a/man/bar_plot_fractions.Rd b/man/bar_plot_fractions.Rd
index ff3a512..4bc1122 100644
--- a/man/bar_plot_fractions.Rd
+++ b/man/bar_plot_fractions.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/analyze.R
 \name{bar_plot_fractions}
 \alias{bar_plot_fractions}
-\title{plot data depending on fractions in a standardized, configurable way}
+\title{Bar chart visualizing fraction based data}
 \usage{
 bar_plot_fractions(
   tb,
@@ -21,34 +21,32 @@ bar_plot_fractions(
 \arguments{
 \item{tb}{tibble}
 
-\item{x_variable}{column in tb}
+\item{x_variable}{column in tb, default is fraction}
 
-\item{y_variable}{column in tb}
+\item{y_variable}{column in tb, default is n}
 
-\item{fill}{column in tb}
+\item{fill}{column in tb, default is fraction}
 
-\item{title}{char}
+\item{title}{plot title}
 
-\item{xlab}{char}
+\item{xlab}{label for x axis, default is fraction}
 
-\item{ylab}{char}
+\item{ylab}{label for y axis, default is n}
 
-\item{filllab}{char}
+\item{filllab}{default is 'Fraction'}
 
-\item{flipped}{bool}
+\item{flipped}{if TRUE draw bars horizontally, else vertically. Default is TRUE}
 
-\item{position}{char}
+\item{position}{default is 'dodge'}
 
-\item{reorder}{bool
+\item{reorder}{Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
 
 plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
-Then color the plot depending on the fill value
-Give the plot a title, an x-label xlab as well as an y-label ylab
-Color the legend according to filllab
-Setting flipped to TRUE makes the bars horizontal
-Improve positioning details according to position
-and finally reorder x_variable (default ist to order fractions according to seat order)}
+Then color the plot depending on the fill value.
+Give the plot a title and a label for x-axis and y-axis,
+color the legend according to filllab and finally
+improve positioning details according to position}
 }
 \description{
-plot data depending on fractions in a standardized, configurable way
+Can be configured to also visualize data not related to fractions.
 }
diff --git a/man/fetch_all.Rd b/man/fetch_all.Rd
index 694e97d..0b52206 100644
--- a/man/fetch_all.Rd
+++ b/man/fetch_all.Rd
@@ -8,6 +8,10 @@ fetch_all(download_dir = "data/records/", create = FALSE)
 }
 \arguments{
 \item{download_dir}{character}
+
+\item{create}{bool
+
+if create is TRUE, the directory given in download_dir is created}
 }
 \description{
 This fetches all available records of the 19th legislative period of the german Bundestag.
diff --git a/man/join_speaker.Rd b/man/join_speaker.Rd
index e03ec2d..5b6413b 100644
--- a/man/join_speaker.Rd
+++ b/man/join_speaker.Rd
@@ -9,12 +9,12 @@ join_speaker(tb, res, fraction_only = F)
 \arguments{
 \item{tb}{tibble}
 
-\item{res}{tibble}
+\item{res}{list of tibbles}
 
-\item{fraction_only}{bool
+\item{fraction_only}{if TRUE, only select fraction from the resulting joined tibble
 
 left join speaker information from res$speaker into tb.
-if fraction_only is TRUE, only fraction is selected from the resulting joined tibble}
+if fraction_only}
 }
 \description{
 add information from speaker table to a tibble containing speaker id
diff --git a/man/party_colors.Rd b/man/party_colors.Rd
index b0e5a18..1fd75b7 100644
--- a/man/party_colors.Rd
+++ b/man/party_colors.Rd
@@ -3,7 +3,7 @@
 \docType{data}
 \name{party_colors}
 \alias{party_colors}
-\title{lookup table for party colors}
+\title{lookup table for official party colors}
 \format{
 An object of class \code{character} of length 8.
 }
@@ -11,6 +11,6 @@ An object of class \code{character} of length 8.
 party_colors
 }
 \description{
-lookup table for party colors
+lookup table for official party colors
 }
 \keyword{datasets}
diff --git a/man/read_from_csv.Rd b/man/read_from_csv.Rd
index 3244c82..cd3fbc4 100644
--- a/man/read_from_csv.Rd
+++ b/man/read_from_csv.Rd
@@ -2,15 +2,15 @@
 % Please edit documentation in R/parse.R
 \name{read_from_csv}
 \alias{read_from_csv}
-\title{Read the needed tables for developing from a csv file.}
+\title{create a tibble from the csv file}
 \usage{
 read_from_csv(path = "data/csv/")
 }
 \arguments{
-\item{path}{char
+\item{path}{directory to read files from
 
-Reading the tables from a csv is way faster than reading and repairing the data every single time}
+reading the tables from a csv is way faster than reading and repairing the data every single time}
 }
 \description{
-Read the needed tables for developing from a csv file.
+create a tibble from the csv file
 }
diff --git a/man/word_usage_by_date.Rd b/man/word_usage_by_date.Rd
index ab96b2d..661d9c5 100644
--- a/man/word_usage_by_date.Rd
+++ b/man/word_usage_by_date.Rd
@@ -2,22 +2,19 @@
 % Please edit documentation in R/analyze.R
 \name{word_usage_by_date}
 \alias{word_usage_by_date}
-\title{Counts how many talks do match a given pattern and summarises by date}
+\title{Word usage summarised by date}
 \usage{
 word_usage_by_date(res, patterns, name, tidy = F)
 }
 \arguments{
-\item{res}{tibble}
+\item{res}{List of Tibbles to be analysed.}
 
-\item{patterns}{char list}
+\item{patterns}{Words to look up.}
 
-\item{name}{char ? what is name needed for??}
+\item{name}{?}
 
-\item{tidy}{bool, default F
-
-shorter summary if tidy=F
-if tidy is set to T, the resulting tibble is tidy}
+\item{tidy}{default is FALSE.}
 }
 \description{
-Counts how many talks do match a given pattern and summarises by date
+Counts how many talks do match a given pattern and summarises by date.
 }
diff --git a/man/write_to_csv.Rd b/man/write_to_csv.Rd
index cd7f200..5cd1af7 100644
--- a/man/write_to_csv.Rd
+++ b/man/write_to_csv.Rd
@@ -2,19 +2,17 @@
 % Please edit documentation in R/parse.R
 \name{write_to_csv}
 \alias{write_to_csv}
-\title{Write the parsed and repaired results into a csv file to make loading and developing faster and easier}
+\title{Write the parsed and repaired results into separate csv files}
 \usage{
 write_to_csv(tables, path = "data/csv/", create = F)
 }
 \arguments{
-\item{tables}{tibble list}
+\item{tables}{list of tables to convert into a csv files.}
 
-\item{path}{char}
+\item{path}{where to put the csv files.}
 
-\item{create}{bool
-
-if create is set to TRUE, the directory given in path is created}
+\item{create}{set TRUE if the path does not exist yet and you want to create it}
 }
 \description{
-Write the parsed and repaired results into a csv file to make loading and developing faster and easier
+Write the parsed and repaired results into separate csv files
 }
diff --git a/vignettes/genderequality.Rmd b/vignettes/genderequality.Rmd
new file mode 100644
index 0000000..d4aa64d
--- /dev/null
+++ b/vignettes/genderequality.Rmd
@@ -0,0 +1,96 @@
+---
+title: "genderequality"
+output: rmarkdown::html_vignette
+vignette: >
+  %\VignetteIndexEntry{genderequality}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+```{r setup}
+library(hateimparlament)
+library(dplyr)
+library(ggplot2)
+library(stringr)
+library(tidyr)
+library(rvest)
+```
+
+## Preparation of data
+
+First, you need to download all records of the current legislative period.
+```r
+fetch_all("../records/") # path to directory where records should be stored
+```
+Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
+```r
+read_all("../records/") %>% repair() -> res
+```
+We also used `repair` to fix a bunch of formatting issues in the records and unpacked
+the result into more descriptive variables.
+
+For development purposes, we load the tables from csv files.
+```{r}
+res <- read_from_csv('../csv/')
+```
+and unpack our tibbles
+```{r}
+comments <- res$comments
+speeches <- res$speeches
+speaker <- res$speaker
+talks <- res$talks
+```
+
+Bevor we can do our analysis, we have to assign a gender to our politicans.
+
+```{r}
+extract_href <- function(sel, html) {
+  html %>%
+    html_node(sel) %>%
+    html_attr("href")
+}
+
+first_content_p_text <- function(url) {
+  res <- NA
+  i <- 1
+  while(is.na(res)) {
+    read_html(url) %>% 
+      html_node(str_glue("#mw-content-text > div.mw-parser-output > p:nth-child({i})"))  %>% 
+      html_text() -> res
+    i <- i + 1
+  }
+  res
+}
+
+abgeordneten_list_html <- read_html(
+  "https://de.wikipedia.org/wiki/Liste_der_Mitglieder_des_Deutschen_Bundestages_(19._Wahlperiode)")
+
+selectors <- str_glue("#mw-content-text > div.mw-parser-output > table:nth-child(20) > tbody > tr:nth-child({2:709}) > td:nth-child(2) > a")
+link_part2 <- sapply(selectors, extract_href, abgeordneten_list_html)
+link <- str_c("https://de.wikipedia.org", link_part2)
+
+text <- sapply(link, first_content_p_text)
+text %>% 
+  str_extract(" ist ein.") %>% 
+  str_replace(" ist eine", "female") %>% 
+  str_replace(" ist ein ", "male") ->
+  gender
+
+text %>% 
+  str_extract("^([:upper:]?[:lower:]+[\\s\\-]?)*") %>% 
+  str_trim() -> 
+  names
+
+gender <- tibble(speaker = names,
+                 gender = gender)
+
+
+```
+