fix errors from refactoring and add documentation to all remaining functions in analyze.R

4 lat temu · 5e9f70627d
--- a/R/analyze.R
+++ b/R/analyze.R
@@ -1,10 +1,31 @@
 #' Count number of occurences of a given word
 #' 
 #' @param res tibble
 #' @param word character
 #'
 #' Add number of occurences of word to talks
 #' 
 #' @export
 find_word <- function(res, word) {
    talks <- res$talks
    mutate(talks, occurences = sapply(str_match_all(talks$content, regex(word, ignore_case = TRUE)),
           nrow))
    mutate(
        talks, 
        occurences = sapply(
            str_match_all(talks$content, regex(word, ignore_case = TRUE)),
            nrow
        )
    )
 }

 #' add information from speaker table to a tibble containing speaker id
 #' 
 #' @param tb tibble
 #' @param res tibble
 #' @param fraction_only bool
 #' 
 #' left join speaker information from res$speaker into tb.
 #' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble
 #'
 #' @export
 join_speaker <- function(tb, res, fraction_only = F) {
    joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
@@ -12,6 +33,8 @@ join_speaker <- function(tb, res, fraction_only = F) {
    else joined
 }

 #' lookup table for party colors
 #'
 #' @export
 party_colors <- c(
  AfD="#1A9FDD",
@@ -28,6 +51,28 @@ party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
                        "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU",
                        "FDP", "AfD", NA_character_))

 #' plot data depending on fractions in a standardized, configurable way
 #' 
 #' @param tb tibble
 #' @param x_variable column in tb
 #' @param y_variable column in tb
 #' @param fill column in tb
 #' @param title char
 #' @param xlab char
 #' @param ylab char
 #' @param filllab char
 #' @param flipped bool
 #' @param position char
 #' @param reorder bool
 #' 
 #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
 #' Then color the plot depending on the fill value
 #' Give the plot a title, an x-label xlab as well as an y-label ylab
 #' Color the legend according to filllab
 #' Setting flipped to TRUE makes the bars horizontal
 #' Improve positioning details according to position
 #' and finally reorder x_variable (default ist to order fractions according to seat order)
 #' 
 #' @export
 bar_plot_fractions <- function(tb,
                                x_variable = NULL, # default is fraction
@@ -71,8 +116,16 @@ bar_plot_fractions <- function(tb,
    if (flipped) plt + coord_flip() else plt
 }

 # Counts how many talks do match a given pattern and summarises by date
 #
 #' Counts how many talks do match a given pattern and summarises by date
 #' 
 #' @param res tibble
 #' @param patterns char list
 #' @param name char ? what is name needed for??
 #' @param tidy bool, default F
 #' 
 #' shorter summary if tidy=F
 #' if tidy is set to T, the resulting tibble is tidy
 #' 
 #' @export
 word_usage_by_date <- function(res, patterns, name, tidy=F) {
    tb <- res$talks
--- a/R/parse.R
+++ b/R/parse.R
@@ -90,7 +90,7 @@ parse_speaker <- function(speaker_xml) {
    nm <- xml_child(speaker_xml)
    vorname <- xml_get(nm, "vorname")
    nachname <- xml_get(nm, "nachname")
    fraction <- xml_get(nm, "fraction")
    fraction <- xml_get(nm, "fraktion")
    titel <- xml_get(nm, "titel")
    rolle <- xml_find_all(nm, "rolle")
    if (length(rolle) > 0) {
@@ -115,7 +115,7 @@ parse_speech <- function(speech_xml, date) {
    for (node in cs) {
        if (xml_name(node) == "p" || xml_name(node) == "name") {
            klasse <- xml_attr(node, "klasse")
            if ((!is.na(klasse) && klasse == "speaker") || xml_name(node) == "name") {
            if ((!is.na(klasse) && klasse == "redner") || xml_name(node) == "name") {
                if (!is.na(cur_speaker)) {
                    speech <- c(speech_id = speech_id,
                              speaker = cur_speaker,
@@ -194,7 +194,7 @@ parse_speechlist <- function(speechlist_xml, date) {
 }

 # create a tibble of speaker from a list of xml nodes representing speaker
 parse_speakerliste <- function(speakerliste_xml) {
 parse_speakerlist <- function(speakerliste_xml) {
    d <- sapply(speakerliste_xml, parse_speaker)
    tibble(id = d["id",],
           vorname = d["vorname",],
--- a/R/repair.R
+++ b/R/repair.R
@@ -59,7 +59,7 @@ repair_talks <- function(talks) {
 #' unique (luckily :D)
 #' 
 #' @param tb tibble
 #' @param redner tibble
 #' @param speaker tibble
 #' @param name_variable name
 #' 
 #' Tries to match the name_variable column with speaker names
@@ -83,29 +83,31 @@ lookup_speaker <- function(tb, speaker, name_variable) {
        mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
 }

 repair_comments <- function(comments, redner) {
 repair_comments <- function(comments, speaker) {
    cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
               "Use repair(, repair_commments = FALSE) to skip this.\n"))
    # try to find a redner id for each actual comment
    # try to find a speaker id for each actual comment
    comments %>%
        filter(!is.na(kommentator)) %>%
        lookup_redner(redner, kommentator) %>%
        lookup_speaker(speaker, kommentator) %>%
        left_join(comments, ., by="kommentator") %>%
        select(-kommentator)
 }

 #' Repair parsed tables
 #'
 #' TODO: Explain repair_comments argument
 #' (if TRUE, we try to lookup redner names in redner table)
 #' @param parse_output tibble
 #' @param repair_comments bool
 #' 
 #' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table.
 #'
 #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
 #' Since repaired tables should be a fixpoint of repair.
 #' @export
 repair <- function(parse_output, repair_comments = FALSE) {

    list(redner = repair_speaker(parse_output$speaker),
         reden = repair_speeches(parse_output$speeches),
    list(speaker = repair_speaker(parse_output$speaker),
         speeches = repair_speeches(parse_output$speeches),
         talks = repair_talks(parse_output$talks),
         comments = if(repair_comments) repair_comments(parse_output$comments,
                                                        parse_output$speaker)
--- a/man/bar_plot_fractions.Rd
+++ b/man/bar_plot_fractions.Rd
@@ -0,0 +1,54 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/analyze.R
 \name{bar_plot_fractions}
 \alias{bar_plot_fractions}
 \title{plot data depending on fractions in a standardized, configurable way}
 \usage{
 bar_plot_fractions(
  tb,
  x_variable = NULL,
  y_variable = NULL,
  fill = NULL,
  title = NULL,
  xlab = "Fraction",
  ylab = "n",
  filllab = "Fraction",
  flipped = TRUE,
  position = "dodge",
  reorder = FALSE
 )
 }
 \arguments{
 \item{tb}{tibble}

 \item{x_variable}{column in tb}

 \item{y_variable}{column in tb}

 \item{fill}{column in tb}

 \item{title}{char}

 \item{xlab}{char}

 \item{ylab}{char}

 \item{filllab}{char}

 \item{flipped}{bool}

 \item{position}{char}

 \item{reorder}{bool

 plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
 Then color the plot depending on the fill value
 Give the plot a title, an x-label xlab as well as an y-label ylab
 Color the legend according to filllab
 Setting flipped to TRUE makes the bars horizontal
 Improve positioning details according to position
 and finally reorder x_variable (default ist to order fractions according to seat order)}
 }
 \description{
 plot data depending on fractions in a standardized, configurable way
 }
--- a/man/find_word.Rd
+++ b/man/find_word.Rd
@@ -0,0 +1,18 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/analyze.R
 \name{find_word}
 \alias{find_word}
 \title{Count number of occurences of a given word}
 \usage{
 find_word(res, word)
 }
 \arguments{
 \item{res}{tibble}

 \item{word}{character

 Add number of occurences of word to talks}
 }
 \description{
 Count number of occurences of a given word
 }
--- a/man/join_speaker.Rd
+++ b/man/join_speaker.Rd
@@ -0,0 +1,21 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/analyze.R
 \name{join_speaker}
 \alias{join_speaker}
 \title{add information from speaker table to a tibble containing speaker id}
 \usage{
 join_speaker(tb, res, fraction_only = F)
 }
 \arguments{
 \item{tb}{tibble}

 \item{res}{tibble}

 \item{fraction_only}{bool

 left join speaker information from res$speaker into tb.
 if fraction_only is TRUE, only fraction is selected from the resulting joined tibble}
 }
 \description{
 add information from speaker table to a tibble containing speaker id
 }
--- a/man/lookup_speaker.Rd
+++ b/man/lookup_speaker.Rd
@@ -0,0 +1,24 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/repair.R
 \name{lookup_speaker}
 \alias{lookup_speaker}
 \title{Lookup name in speakers table}
 \usage{
 lookup_speaker(tb, speaker, name_variable)
 }
 \arguments{
 \item{tb}{tibble}

 \item{speaker}{tibble}

 \item{name_variable}{name

 Tries to match the name_variable column with speaker names

 returns a lookup table}
 }
 \description{
 Tries to find the correct speaker id given a name.
 This is sufficient since every prename lastname combination in the bundestag is
 unique (luckily :D)
 }
--- a/man/party_colors.Rd
+++ b/man/party_colors.Rd
@@ -0,0 +1,16 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/analyze.R
 \docType{data}
 \name{party_colors}
 \alias{party_colors}
 \title{lookup table for party colors}
 \format{
 An object of class \code{character} of length 8.
 }
 \usage{
 party_colors
 }
 \description{
 lookup table for party colors
 }
 \keyword{datasets}
--- a/man/repair.Rd
+++ b/man/repair.Rd
@@ -4,7 +4,17 @@
 \alias{repair}
 \title{Repair parsed tables}
 \usage{
 repair(parse_output)
 repair(parse_output, repair_comments = FALSE)
 }
 \arguments{
 \item{parse_output}{tibble}

 \item{repair_comments}{bool

 If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table.

 Possible test: check identical(repair(res), repair(repair(res))) == TRUE
 Since repaired tables should be a fixpoint of repair.}
 }
 \description{
 Repair parsed tables
--- a/man/word_usage_by_date.Rd
+++ b/man/word_usage_by_date.Rd
@@ -0,0 +1,23 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/analyze.R
 \name{word_usage_by_date}
 \alias{word_usage_by_date}
 \title{Counts how many talks do match a given pattern and summarises by date}
 \usage{
 word_usage_by_date(res, patterns, name, tidy = F)
 }
 \arguments{
 \item{res}{tibble}

 \item{patterns}{char list}

 \item{name}{char ? what is name needed for??}

 \item{tidy}{bool, default F

 shorter summary if tidy=F
 if tidy is set to T, the resulting tibble is tidy}
 }
 \description{
 Counts how many talks do match a given pattern and summarises by date
 }