diff --git a/R/analyze.R b/R/analyze.R index fd728cd..917cf68 100644 --- a/R/analyze.R +++ b/R/analyze.R @@ -1,10 +1,31 @@ +#' Count number of occurences of a given word +#' +#' @param res tibble +#' @param word character +#' +#' Add number of occurences of word to talks +#' #' @export find_word <- function(res, word) { talks <- res$talks - mutate(talks, occurences = sapply(str_match_all(talks$content, regex(word, ignore_case = TRUE)), - nrow)) + mutate( + talks, + occurences = sapply( + str_match_all(talks$content, regex(word, ignore_case = TRUE)), + nrow + ) + ) } +#' add information from speaker table to a tibble containing speaker id +#' +#' @param tb tibble +#' @param res tibble +#' @param fraction_only bool +#' +#' left join speaker information from res$speaker into tb. +#' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble +#' #' @export join_speaker <- function(tb, res, fraction_only = F) { joined <- left_join(tb, res$speaker, by=c("speaker" = "id")) @@ -12,6 +33,8 @@ join_speaker <- function(tb, res, fraction_only = F) { else joined } +#' lookup table for party colors +#' #' @export party_colors <- c( AfD="#1A9FDD", @@ -28,6 +51,28 @@ party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos", "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU", "FDP", "AfD", NA_character_)) +#' plot data depending on fractions in a standardized, configurable way +#' +#' @param tb tibble +#' @param x_variable column in tb +#' @param y_variable column in tb +#' @param fill column in tb +#' @param title char +#' @param xlab char +#' @param ylab char +#' @param filllab char +#' @param flipped bool +#' @param position char +#' @param reorder bool +#' +#' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. +#' Then color the plot depending on the fill value +#' Give the plot a title, an x-label xlab as well as an y-label ylab +#' Color the legend according to filllab +#' Setting flipped to TRUE makes the bars horizontal +#' Improve positioning details according to position +#' and finally reorder x_variable (default ist to order fractions according to seat order) +#' #' @export bar_plot_fractions <- function(tb, x_variable = NULL, # default is fraction @@ -71,8 +116,16 @@ bar_plot_fractions <- function(tb, if (flipped) plt + coord_flip() else plt } -# Counts how many talks do match a given pattern and summarises by date -# +#' Counts how many talks do match a given pattern and summarises by date +#' +#' @param res tibble +#' @param patterns char list +#' @param name char ? what is name needed for?? +#' @param tidy bool, default F +#' +#' shorter summary if tidy=F +#' if tidy is set to T, the resulting tibble is tidy +#' #' @export word_usage_by_date <- function(res, patterns, name, tidy=F) { tb <- res$talks diff --git a/R/parse.R b/R/parse.R index db99eb5..7a80b76 100644 --- a/R/parse.R +++ b/R/parse.R @@ -90,7 +90,7 @@ parse_speaker <- function(speaker_xml) { nm <- xml_child(speaker_xml) vorname <- xml_get(nm, "vorname") nachname <- xml_get(nm, "nachname") - fraction <- xml_get(nm, "fraction") + fraction <- xml_get(nm, "fraktion") titel <- xml_get(nm, "titel") rolle <- xml_find_all(nm, "rolle") if (length(rolle) > 0) { @@ -115,7 +115,7 @@ parse_speech <- function(speech_xml, date) { for (node in cs) { if (xml_name(node) == "p" || xml_name(node) == "name") { klasse <- xml_attr(node, "klasse") - if ((!is.na(klasse) && klasse == "speaker") || xml_name(node) == "name") { + if ((!is.na(klasse) && klasse == "redner") || xml_name(node) == "name") { if (!is.na(cur_speaker)) { speech <- c(speech_id = speech_id, speaker = cur_speaker, @@ -194,7 +194,7 @@ parse_speechlist <- function(speechlist_xml, date) { } # create a tibble of speaker from a list of xml nodes representing speaker -parse_speakerliste <- function(speakerliste_xml) { +parse_speakerlist <- function(speakerliste_xml) { d <- sapply(speakerliste_xml, parse_speaker) tibble(id = d["id",], vorname = d["vorname",], diff --git a/R/repair.R b/R/repair.R index 3dfdece..07e43f0 100644 --- a/R/repair.R +++ b/R/repair.R @@ -59,7 +59,7 @@ repair_talks <- function(talks) { #' unique (luckily :D) #' #' @param tb tibble -#' @param redner tibble +#' @param speaker tibble #' @param name_variable name #' #' Tries to match the name_variable column with speaker names @@ -83,29 +83,31 @@ lookup_speaker <- function(tb, speaker, name_variable) { mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) } -repair_comments <- function(comments, redner) { +repair_comments <- function(comments, speaker) { cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", "Use repair(, repair_commments = FALSE) to skip this.\n")) - # try to find a redner id for each actual comment + # try to find a speaker id for each actual comment comments %>% filter(!is.na(kommentator)) %>% - lookup_redner(redner, kommentator) %>% + lookup_speaker(speaker, kommentator) %>% left_join(comments, ., by="kommentator") %>% select(-kommentator) } #' Repair parsed tables #' -#' TODO: Explain repair_comments argument -#' (if TRUE, we try to lookup redner names in redner table) +#' @param parse_output tibble +#' @param repair_comments bool +#' +#' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table. #' #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE #' Since repaired tables should be a fixpoint of repair. #' @export repair <- function(parse_output, repair_comments = FALSE) { - list(redner = repair_speaker(parse_output$speaker), - reden = repair_speeches(parse_output$speeches), + list(speaker = repair_speaker(parse_output$speaker), + speeches = repair_speeches(parse_output$speeches), talks = repair_talks(parse_output$talks), comments = if(repair_comments) repair_comments(parse_output$comments, parse_output$speaker) diff --git a/man/bar_plot_fractions.Rd b/man/bar_plot_fractions.Rd new file mode 100644 index 0000000..ff3a512 --- /dev/null +++ b/man/bar_plot_fractions.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyze.R +\name{bar_plot_fractions} +\alias{bar_plot_fractions} +\title{plot data depending on fractions in a standardized, configurable way} +\usage{ +bar_plot_fractions( + tb, + x_variable = NULL, + y_variable = NULL, + fill = NULL, + title = NULL, + xlab = "Fraction", + ylab = "n", + filllab = "Fraction", + flipped = TRUE, + position = "dodge", + reorder = FALSE +) +} +\arguments{ +\item{tb}{tibble} + +\item{x_variable}{column in tb} + +\item{y_variable}{column in tb} + +\item{fill}{column in tb} + +\item{title}{char} + +\item{xlab}{char} + +\item{ylab}{char} + +\item{filllab}{char} + +\item{flipped}{bool} + +\item{position}{char} + +\item{reorder}{bool + +plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. +Then color the plot depending on the fill value +Give the plot a title, an x-label xlab as well as an y-label ylab +Color the legend according to filllab +Setting flipped to TRUE makes the bars horizontal +Improve positioning details according to position +and finally reorder x_variable (default ist to order fractions according to seat order)} +} +\description{ +plot data depending on fractions in a standardized, configurable way +} diff --git a/man/find_word.Rd b/man/find_word.Rd new file mode 100644 index 0000000..acac553 --- /dev/null +++ b/man/find_word.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyze.R +\name{find_word} +\alias{find_word} +\title{Count number of occurences of a given word} +\usage{ +find_word(res, word) +} +\arguments{ +\item{res}{tibble} + +\item{word}{character + +Add number of occurences of word to talks} +} +\description{ +Count number of occurences of a given word +} diff --git a/man/join_speaker.Rd b/man/join_speaker.Rd new file mode 100644 index 0000000..e03ec2d --- /dev/null +++ b/man/join_speaker.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyze.R +\name{join_speaker} +\alias{join_speaker} +\title{add information from speaker table to a tibble containing speaker id} +\usage{ +join_speaker(tb, res, fraction_only = F) +} +\arguments{ +\item{tb}{tibble} + +\item{res}{tibble} + +\item{fraction_only}{bool + +left join speaker information from res$speaker into tb. +if fraction_only is TRUE, only fraction is selected from the resulting joined tibble} +} +\description{ +add information from speaker table to a tibble containing speaker id +} diff --git a/man/lookup_speaker.Rd b/man/lookup_speaker.Rd new file mode 100644 index 0000000..24e9c1c --- /dev/null +++ b/man/lookup_speaker.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/repair.R +\name{lookup_speaker} +\alias{lookup_speaker} +\title{Lookup name in speakers table} +\usage{ +lookup_speaker(tb, speaker, name_variable) +} +\arguments{ +\item{tb}{tibble} + +\item{speaker}{tibble} + +\item{name_variable}{name + +Tries to match the name_variable column with speaker names + +returns a lookup table} +} +\description{ +Tries to find the correct speaker id given a name. +This is sufficient since every prename lastname combination in the bundestag is +unique (luckily :D) +} diff --git a/man/party_colors.Rd b/man/party_colors.Rd new file mode 100644 index 0000000..b0e5a18 --- /dev/null +++ b/man/party_colors.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyze.R +\docType{data} +\name{party_colors} +\alias{party_colors} +\title{lookup table for party colors} +\format{ +An object of class \code{character} of length 8. +} +\usage{ +party_colors +} +\description{ +lookup table for party colors +} +\keyword{datasets} diff --git a/man/repair.Rd b/man/repair.Rd index c7e0342..8a57937 100644 --- a/man/repair.Rd +++ b/man/repair.Rd @@ -4,7 +4,17 @@ \alias{repair} \title{Repair parsed tables} \usage{ -repair(parse_output) +repair(parse_output, repair_comments = FALSE) +} +\arguments{ +\item{parse_output}{tibble} + +\item{repair_comments}{bool + +If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table. + +Possible test: check identical(repair(res), repair(repair(res))) == TRUE +Since repaired tables should be a fixpoint of repair.} } \description{ Repair parsed tables diff --git a/man/word_usage_by_date.Rd b/man/word_usage_by_date.Rd new file mode 100644 index 0000000..ab96b2d --- /dev/null +++ b/man/word_usage_by_date.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/analyze.R +\name{word_usage_by_date} +\alias{word_usage_by_date} +\title{Counts how many talks do match a given pattern and summarises by date} +\usage{ +word_usage_by_date(res, patterns, name, tidy = F) +} +\arguments{ +\item{res}{tibble} + +\item{patterns}{char list} + +\item{name}{char ? what is name needed for??} + +\item{tidy}{bool, default F + +shorter summary if tidy=F +if tidy is set to T, the resulting tibble is tidy} +} +\description{ +Counts how many talks do match a given pattern and summarises by date +}