| @@ -1,10 +1,31 @@ | |||
| #' Count number of occurences of a given word | |||
| #' | |||
| #' @param res tibble | |||
| #' @param word character | |||
| #' | |||
| #' Add number of occurences of word to talks | |||
| #' | |||
| #' @export | |||
| find_word <- function(res, word) { | |||
| talks <- res$talks | |||
| mutate(talks, occurences = sapply(str_match_all(talks$content, regex(word, ignore_case = TRUE)), | |||
| nrow)) | |||
| mutate( | |||
| talks, | |||
| occurences = sapply( | |||
| str_match_all(talks$content, regex(word, ignore_case = TRUE)), | |||
| nrow | |||
| ) | |||
| ) | |||
| } | |||
| #' add information from speaker table to a tibble containing speaker id | |||
| #' | |||
| #' @param tb tibble | |||
| #' @param res tibble | |||
| #' @param fraction_only bool | |||
| #' | |||
| #' left join speaker information from res$speaker into tb. | |||
| #' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble | |||
| #' | |||
| #' @export | |||
| join_speaker <- function(tb, res, fraction_only = F) { | |||
| joined <- left_join(tb, res$speaker, by=c("speaker" = "id")) | |||
| @@ -12,6 +33,8 @@ join_speaker <- function(tb, res, fraction_only = F) { | |||
| else joined | |||
| } | |||
| #' lookup table for party colors | |||
| #' | |||
| #' @export | |||
| party_colors <- c( | |||
| AfD="#1A9FDD", | |||
| @@ -28,6 +51,28 @@ party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos", | |||
| "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU", | |||
| "FDP", "AfD", NA_character_)) | |||
| #' plot data depending on fractions in a standardized, configurable way | |||
| #' | |||
| #' @param tb tibble | |||
| #' @param x_variable column in tb | |||
| #' @param y_variable column in tb | |||
| #' @param fill column in tb | |||
| #' @param title char | |||
| #' @param xlab char | |||
| #' @param ylab char | |||
| #' @param filllab char | |||
| #' @param flipped bool | |||
| #' @param position char | |||
| #' @param reorder bool | |||
| #' | |||
| #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. | |||
| #' Then color the plot depending on the fill value | |||
| #' Give the plot a title, an x-label xlab as well as an y-label ylab | |||
| #' Color the legend according to filllab | |||
| #' Setting flipped to TRUE makes the bars horizontal | |||
| #' Improve positioning details according to position | |||
| #' and finally reorder x_variable (default ist to order fractions according to seat order) | |||
| #' | |||
| #' @export | |||
| bar_plot_fractions <- function(tb, | |||
| x_variable = NULL, # default is fraction | |||
| @@ -71,8 +116,16 @@ bar_plot_fractions <- function(tb, | |||
| if (flipped) plt + coord_flip() else plt | |||
| } | |||
| # Counts how many talks do match a given pattern and summarises by date | |||
| # | |||
| #' Counts how many talks do match a given pattern and summarises by date | |||
| #' | |||
| #' @param res tibble | |||
| #' @param patterns char list | |||
| #' @param name char ? what is name needed for?? | |||
| #' @param tidy bool, default F | |||
| #' | |||
| #' shorter summary if tidy=F | |||
| #' if tidy is set to T, the resulting tibble is tidy | |||
| #' | |||
| #' @export | |||
| word_usage_by_date <- function(res, patterns, name, tidy=F) { | |||
| tb <- res$talks | |||
| @@ -90,7 +90,7 @@ parse_speaker <- function(speaker_xml) { | |||
| nm <- xml_child(speaker_xml) | |||
| vorname <- xml_get(nm, "vorname") | |||
| nachname <- xml_get(nm, "nachname") | |||
| fraction <- xml_get(nm, "fraction") | |||
| fraction <- xml_get(nm, "fraktion") | |||
| titel <- xml_get(nm, "titel") | |||
| rolle <- xml_find_all(nm, "rolle") | |||
| if (length(rolle) > 0) { | |||
| @@ -115,7 +115,7 @@ parse_speech <- function(speech_xml, date) { | |||
| for (node in cs) { | |||
| if (xml_name(node) == "p" || xml_name(node) == "name") { | |||
| klasse <- xml_attr(node, "klasse") | |||
| if ((!is.na(klasse) && klasse == "speaker") || xml_name(node) == "name") { | |||
| if ((!is.na(klasse) && klasse == "redner") || xml_name(node) == "name") { | |||
| if (!is.na(cur_speaker)) { | |||
| speech <- c(speech_id = speech_id, | |||
| speaker = cur_speaker, | |||
| @@ -194,7 +194,7 @@ parse_speechlist <- function(speechlist_xml, date) { | |||
| } | |||
| # create a tibble of speaker from a list of xml nodes representing speaker | |||
| parse_speakerliste <- function(speakerliste_xml) { | |||
| parse_speakerlist <- function(speakerliste_xml) { | |||
| d <- sapply(speakerliste_xml, parse_speaker) | |||
| tibble(id = d["id",], | |||
| vorname = d["vorname",], | |||
| @@ -59,7 +59,7 @@ repair_talks <- function(talks) { | |||
| #' unique (luckily :D) | |||
| #' | |||
| #' @param tb tibble | |||
| #' @param redner tibble | |||
| #' @param speaker tibble | |||
| #' @param name_variable name | |||
| #' | |||
| #' Tries to match the name_variable column with speaker names | |||
| @@ -83,29 +83,31 @@ lookup_speaker <- function(tb, speaker, name_variable) { | |||
| mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) | |||
| } | |||
| repair_comments <- function(comments, redner) { | |||
| repair_comments <- function(comments, speaker) { | |||
| cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", | |||
| "Use repair(, repair_commments = FALSE) to skip this.\n")) | |||
| # try to find a redner id for each actual comment | |||
| # try to find a speaker id for each actual comment | |||
| comments %>% | |||
| filter(!is.na(kommentator)) %>% | |||
| lookup_redner(redner, kommentator) %>% | |||
| lookup_speaker(speaker, kommentator) %>% | |||
| left_join(comments, ., by="kommentator") %>% | |||
| select(-kommentator) | |||
| } | |||
| #' Repair parsed tables | |||
| #' | |||
| #' TODO: Explain repair_comments argument | |||
| #' (if TRUE, we try to lookup redner names in redner table) | |||
| #' @param parse_output tibble | |||
| #' @param repair_comments bool | |||
| #' | |||
| #' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table. | |||
| #' | |||
| #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE | |||
| #' Since repaired tables should be a fixpoint of repair. | |||
| #' @export | |||
| repair <- function(parse_output, repair_comments = FALSE) { | |||
| list(redner = repair_speaker(parse_output$speaker), | |||
| reden = repair_speeches(parse_output$speeches), | |||
| list(speaker = repair_speaker(parse_output$speaker), | |||
| speeches = repair_speeches(parse_output$speeches), | |||
| talks = repair_talks(parse_output$talks), | |||
| comments = if(repair_comments) repair_comments(parse_output$comments, | |||
| parse_output$speaker) | |||
| @@ -0,0 +1,54 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/analyze.R | |||
| \name{bar_plot_fractions} | |||
| \alias{bar_plot_fractions} | |||
| \title{plot data depending on fractions in a standardized, configurable way} | |||
| \usage{ | |||
| bar_plot_fractions( | |||
| tb, | |||
| x_variable = NULL, | |||
| y_variable = NULL, | |||
| fill = NULL, | |||
| title = NULL, | |||
| xlab = "Fraction", | |||
| ylab = "n", | |||
| filllab = "Fraction", | |||
| flipped = TRUE, | |||
| position = "dodge", | |||
| reorder = FALSE | |||
| ) | |||
| } | |||
| \arguments{ | |||
| \item{tb}{tibble} | |||
| \item{x_variable}{column in tb} | |||
| \item{y_variable}{column in tb} | |||
| \item{fill}{column in tb} | |||
| \item{title}{char} | |||
| \item{xlab}{char} | |||
| \item{ylab}{char} | |||
| \item{filllab}{char} | |||
| \item{flipped}{bool} | |||
| \item{position}{char} | |||
| \item{reorder}{bool | |||
| plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. | |||
| Then color the plot depending on the fill value | |||
| Give the plot a title, an x-label xlab as well as an y-label ylab | |||
| Color the legend according to filllab | |||
| Setting flipped to TRUE makes the bars horizontal | |||
| Improve positioning details according to position | |||
| and finally reorder x_variable (default ist to order fractions according to seat order)} | |||
| } | |||
| \description{ | |||
| plot data depending on fractions in a standardized, configurable way | |||
| } | |||
| @@ -0,0 +1,18 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/analyze.R | |||
| \name{find_word} | |||
| \alias{find_word} | |||
| \title{Count number of occurences of a given word} | |||
| \usage{ | |||
| find_word(res, word) | |||
| } | |||
| \arguments{ | |||
| \item{res}{tibble} | |||
| \item{word}{character | |||
| Add number of occurences of word to talks} | |||
| } | |||
| \description{ | |||
| Count number of occurences of a given word | |||
| } | |||
| @@ -0,0 +1,21 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/analyze.R | |||
| \name{join_speaker} | |||
| \alias{join_speaker} | |||
| \title{add information from speaker table to a tibble containing speaker id} | |||
| \usage{ | |||
| join_speaker(tb, res, fraction_only = F) | |||
| } | |||
| \arguments{ | |||
| \item{tb}{tibble} | |||
| \item{res}{tibble} | |||
| \item{fraction_only}{bool | |||
| left join speaker information from res$speaker into tb. | |||
| if fraction_only is TRUE, only fraction is selected from the resulting joined tibble} | |||
| } | |||
| \description{ | |||
| add information from speaker table to a tibble containing speaker id | |||
| } | |||
| @@ -0,0 +1,24 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/repair.R | |||
| \name{lookup_speaker} | |||
| \alias{lookup_speaker} | |||
| \title{Lookup name in speakers table} | |||
| \usage{ | |||
| lookup_speaker(tb, speaker, name_variable) | |||
| } | |||
| \arguments{ | |||
| \item{tb}{tibble} | |||
| \item{speaker}{tibble} | |||
| \item{name_variable}{name | |||
| Tries to match the name_variable column with speaker names | |||
| returns a lookup table} | |||
| } | |||
| \description{ | |||
| Tries to find the correct speaker id given a name. | |||
| This is sufficient since every prename lastname combination in the bundestag is | |||
| unique (luckily :D) | |||
| } | |||
| @@ -0,0 +1,16 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/analyze.R | |||
| \docType{data} | |||
| \name{party_colors} | |||
| \alias{party_colors} | |||
| \title{lookup table for party colors} | |||
| \format{ | |||
| An object of class \code{character} of length 8. | |||
| } | |||
| \usage{ | |||
| party_colors | |||
| } | |||
| \description{ | |||
| lookup table for party colors | |||
| } | |||
| \keyword{datasets} | |||
| @@ -4,7 +4,17 @@ | |||
| \alias{repair} | |||
| \title{Repair parsed tables} | |||
| \usage{ | |||
| repair(parse_output) | |||
| repair(parse_output, repair_comments = FALSE) | |||
| } | |||
| \arguments{ | |||
| \item{parse_output}{tibble} | |||
| \item{repair_comments}{bool | |||
| If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table. | |||
| Possible test: check identical(repair(res), repair(repair(res))) == TRUE | |||
| Since repaired tables should be a fixpoint of repair.} | |||
| } | |||
| \description{ | |||
| Repair parsed tables | |||
| @@ -0,0 +1,23 @@ | |||
| % Generated by roxygen2: do not edit by hand | |||
| % Please edit documentation in R/analyze.R | |||
| \name{word_usage_by_date} | |||
| \alias{word_usage_by_date} | |||
| \title{Counts how many talks do match a given pattern and summarises by date} | |||
| \usage{ | |||
| word_usage_by_date(res, patterns, name, tidy = F) | |||
| } | |||
| \arguments{ | |||
| \item{res}{tibble} | |||
| \item{patterns}{char list} | |||
| \item{name}{char ? what is name needed for??} | |||
| \item{tidy}{bool, default F | |||
| shorter summary if tidy=F | |||
| if tidy is set to T, the resulting tibble is tidy} | |||
| } | |||
| \description{ | |||
| Counts how many talks do match a given pattern and summarises by date | |||
| } | |||