| @@ -1,10 +1,31 @@ | |||||
| #' Count number of occurences of a given word | |||||
| #' | |||||
| #' @param res tibble | |||||
| #' @param word character | |||||
| #' | |||||
| #' Add number of occurences of word to talks | |||||
| #' | |||||
| #' @export | #' @export | ||||
| find_word <- function(res, word) { | find_word <- function(res, word) { | ||||
| talks <- res$talks | talks <- res$talks | ||||
| mutate(talks, occurences = sapply(str_match_all(talks$content, regex(word, ignore_case = TRUE)), | |||||
| nrow)) | |||||
| mutate( | |||||
| talks, | |||||
| occurences = sapply( | |||||
| str_match_all(talks$content, regex(word, ignore_case = TRUE)), | |||||
| nrow | |||||
| ) | |||||
| ) | |||||
| } | } | ||||
| #' add information from speaker table to a tibble containing speaker id | |||||
| #' | |||||
| #' @param tb tibble | |||||
| #' @param res tibble | |||||
| #' @param fraction_only bool | |||||
| #' | |||||
| #' left join speaker information from res$speaker into tb. | |||||
| #' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble | |||||
| #' | |||||
| #' @export | #' @export | ||||
| join_speaker <- function(tb, res, fraction_only = F) { | join_speaker <- function(tb, res, fraction_only = F) { | ||||
| joined <- left_join(tb, res$speaker, by=c("speaker" = "id")) | joined <- left_join(tb, res$speaker, by=c("speaker" = "id")) | ||||
| @@ -12,6 +33,8 @@ join_speaker <- function(tb, res, fraction_only = F) { | |||||
| else joined | else joined | ||||
| } | } | ||||
| #' lookup table for party colors | |||||
| #' | |||||
| #' @export | #' @export | ||||
| party_colors <- c( | party_colors <- c( | ||||
| AfD="#1A9FDD", | AfD="#1A9FDD", | ||||
| @@ -28,6 +51,28 @@ party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos", | |||||
| "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU", | "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU", | ||||
| "FDP", "AfD", NA_character_)) | "FDP", "AfD", NA_character_)) | ||||
| #' plot data depending on fractions in a standardized, configurable way | |||||
| #' | |||||
| #' @param tb tibble | |||||
| #' @param x_variable column in tb | |||||
| #' @param y_variable column in tb | |||||
| #' @param fill column in tb | |||||
| #' @param title char | |||||
| #' @param xlab char | |||||
| #' @param ylab char | |||||
| #' @param filllab char | |||||
| #' @param flipped bool | |||||
| #' @param position char | |||||
| #' @param reorder bool | |||||
| #' | |||||
| #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. | |||||
| #' Then color the plot depending on the fill value | |||||
| #' Give the plot a title, an x-label xlab as well as an y-label ylab | |||||
| #' Color the legend according to filllab | |||||
| #' Setting flipped to TRUE makes the bars horizontal | |||||
| #' Improve positioning details according to position | |||||
| #' and finally reorder x_variable (default ist to order fractions according to seat order) | |||||
| #' | |||||
| #' @export | #' @export | ||||
| bar_plot_fractions <- function(tb, | bar_plot_fractions <- function(tb, | ||||
| x_variable = NULL, # default is fraction | x_variable = NULL, # default is fraction | ||||
| @@ -71,8 +116,16 @@ bar_plot_fractions <- function(tb, | |||||
| if (flipped) plt + coord_flip() else plt | if (flipped) plt + coord_flip() else plt | ||||
| } | } | ||||
| # Counts how many talks do match a given pattern and summarises by date | |||||
| # | |||||
| #' Counts how many talks do match a given pattern and summarises by date | |||||
| #' | |||||
| #' @param res tibble | |||||
| #' @param patterns char list | |||||
| #' @param name char ? what is name needed for?? | |||||
| #' @param tidy bool, default F | |||||
| #' | |||||
| #' shorter summary if tidy=F | |||||
| #' if tidy is set to T, the resulting tibble is tidy | |||||
| #' | |||||
| #' @export | #' @export | ||||
| word_usage_by_date <- function(res, patterns, name, tidy=F) { | word_usage_by_date <- function(res, patterns, name, tidy=F) { | ||||
| tb <- res$talks | tb <- res$talks | ||||
| @@ -90,7 +90,7 @@ parse_speaker <- function(speaker_xml) { | |||||
| nm <- xml_child(speaker_xml) | nm <- xml_child(speaker_xml) | ||||
| vorname <- xml_get(nm, "vorname") | vorname <- xml_get(nm, "vorname") | ||||
| nachname <- xml_get(nm, "nachname") | nachname <- xml_get(nm, "nachname") | ||||
| fraction <- xml_get(nm, "fraction") | |||||
| fraction <- xml_get(nm, "fraktion") | |||||
| titel <- xml_get(nm, "titel") | titel <- xml_get(nm, "titel") | ||||
| rolle <- xml_find_all(nm, "rolle") | rolle <- xml_find_all(nm, "rolle") | ||||
| if (length(rolle) > 0) { | if (length(rolle) > 0) { | ||||
| @@ -115,7 +115,7 @@ parse_speech <- function(speech_xml, date) { | |||||
| for (node in cs) { | for (node in cs) { | ||||
| if (xml_name(node) == "p" || xml_name(node) == "name") { | if (xml_name(node) == "p" || xml_name(node) == "name") { | ||||
| klasse <- xml_attr(node, "klasse") | klasse <- xml_attr(node, "klasse") | ||||
| if ((!is.na(klasse) && klasse == "speaker") || xml_name(node) == "name") { | |||||
| if ((!is.na(klasse) && klasse == "redner") || xml_name(node) == "name") { | |||||
| if (!is.na(cur_speaker)) { | if (!is.na(cur_speaker)) { | ||||
| speech <- c(speech_id = speech_id, | speech <- c(speech_id = speech_id, | ||||
| speaker = cur_speaker, | speaker = cur_speaker, | ||||
| @@ -194,7 +194,7 @@ parse_speechlist <- function(speechlist_xml, date) { | |||||
| } | } | ||||
| # create a tibble of speaker from a list of xml nodes representing speaker | # create a tibble of speaker from a list of xml nodes representing speaker | ||||
| parse_speakerliste <- function(speakerliste_xml) { | |||||
| parse_speakerlist <- function(speakerliste_xml) { | |||||
| d <- sapply(speakerliste_xml, parse_speaker) | d <- sapply(speakerliste_xml, parse_speaker) | ||||
| tibble(id = d["id",], | tibble(id = d["id",], | ||||
| vorname = d["vorname",], | vorname = d["vorname",], | ||||
| @@ -59,7 +59,7 @@ repair_talks <- function(talks) { | |||||
| #' unique (luckily :D) | #' unique (luckily :D) | ||||
| #' | #' | ||||
| #' @param tb tibble | #' @param tb tibble | ||||
| #' @param redner tibble | |||||
| #' @param speaker tibble | |||||
| #' @param name_variable name | #' @param name_variable name | ||||
| #' | #' | ||||
| #' Tries to match the name_variable column with speaker names | #' Tries to match the name_variable column with speaker names | ||||
| @@ -83,29 +83,31 @@ lookup_speaker <- function(tb, speaker, name_variable) { | |||||
| mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) | mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, ""))) | ||||
| } | } | ||||
| repair_comments <- function(comments, redner) { | |||||
| repair_comments <- function(comments, speaker) { | |||||
| cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", | cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n", | ||||
| "Use repair(, repair_commments = FALSE) to skip this.\n")) | "Use repair(, repair_commments = FALSE) to skip this.\n")) | ||||
| # try to find a redner id for each actual comment | |||||
| # try to find a speaker id for each actual comment | |||||
| comments %>% | comments %>% | ||||
| filter(!is.na(kommentator)) %>% | filter(!is.na(kommentator)) %>% | ||||
| lookup_redner(redner, kommentator) %>% | |||||
| lookup_speaker(speaker, kommentator) %>% | |||||
| left_join(comments, ., by="kommentator") %>% | left_join(comments, ., by="kommentator") %>% | ||||
| select(-kommentator) | select(-kommentator) | ||||
| } | } | ||||
| #' Repair parsed tables | #' Repair parsed tables | ||||
| #' | #' | ||||
| #' TODO: Explain repair_comments argument | |||||
| #' (if TRUE, we try to lookup redner names in redner table) | |||||
| #' @param parse_output tibble | |||||
| #' @param repair_comments bool | |||||
| #' | |||||
| #' If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table. | |||||
| #' | #' | ||||
| #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE | #' Possible test: check identical(repair(res), repair(repair(res))) == TRUE | ||||
| #' Since repaired tables should be a fixpoint of repair. | #' Since repaired tables should be a fixpoint of repair. | ||||
| #' @export | #' @export | ||||
| repair <- function(parse_output, repair_comments = FALSE) { | repair <- function(parse_output, repair_comments = FALSE) { | ||||
| list(redner = repair_speaker(parse_output$speaker), | |||||
| reden = repair_speeches(parse_output$speeches), | |||||
| list(speaker = repair_speaker(parse_output$speaker), | |||||
| speeches = repair_speeches(parse_output$speeches), | |||||
| talks = repair_talks(parse_output$talks), | talks = repair_talks(parse_output$talks), | ||||
| comments = if(repair_comments) repair_comments(parse_output$comments, | comments = if(repair_comments) repair_comments(parse_output$comments, | ||||
| parse_output$speaker) | parse_output$speaker) | ||||
| @@ -0,0 +1,54 @@ | |||||
| % Generated by roxygen2: do not edit by hand | |||||
| % Please edit documentation in R/analyze.R | |||||
| \name{bar_plot_fractions} | |||||
| \alias{bar_plot_fractions} | |||||
| \title{plot data depending on fractions in a standardized, configurable way} | |||||
| \usage{ | |||||
| bar_plot_fractions( | |||||
| tb, | |||||
| x_variable = NULL, | |||||
| y_variable = NULL, | |||||
| fill = NULL, | |||||
| title = NULL, | |||||
| xlab = "Fraction", | |||||
| ylab = "n", | |||||
| filllab = "Fraction", | |||||
| flipped = TRUE, | |||||
| position = "dodge", | |||||
| reorder = FALSE | |||||
| ) | |||||
| } | |||||
| \arguments{ | |||||
| \item{tb}{tibble} | |||||
| \item{x_variable}{column in tb} | |||||
| \item{y_variable}{column in tb} | |||||
| \item{fill}{column in tb} | |||||
| \item{title}{char} | |||||
| \item{xlab}{char} | |||||
| \item{ylab}{char} | |||||
| \item{filllab}{char} | |||||
| \item{flipped}{bool} | |||||
| \item{position}{char} | |||||
| \item{reorder}{bool | |||||
| plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. | |||||
| Then color the plot depending on the fill value | |||||
| Give the plot a title, an x-label xlab as well as an y-label ylab | |||||
| Color the legend according to filllab | |||||
| Setting flipped to TRUE makes the bars horizontal | |||||
| Improve positioning details according to position | |||||
| and finally reorder x_variable (default ist to order fractions according to seat order)} | |||||
| } | |||||
| \description{ | |||||
| plot data depending on fractions in a standardized, configurable way | |||||
| } | |||||
| @@ -0,0 +1,18 @@ | |||||
| % Generated by roxygen2: do not edit by hand | |||||
| % Please edit documentation in R/analyze.R | |||||
| \name{find_word} | |||||
| \alias{find_word} | |||||
| \title{Count number of occurences of a given word} | |||||
| \usage{ | |||||
| find_word(res, word) | |||||
| } | |||||
| \arguments{ | |||||
| \item{res}{tibble} | |||||
| \item{word}{character | |||||
| Add number of occurences of word to talks} | |||||
| } | |||||
| \description{ | |||||
| Count number of occurences of a given word | |||||
| } | |||||
| @@ -0,0 +1,21 @@ | |||||
| % Generated by roxygen2: do not edit by hand | |||||
| % Please edit documentation in R/analyze.R | |||||
| \name{join_speaker} | |||||
| \alias{join_speaker} | |||||
| \title{add information from speaker table to a tibble containing speaker id} | |||||
| \usage{ | |||||
| join_speaker(tb, res, fraction_only = F) | |||||
| } | |||||
| \arguments{ | |||||
| \item{tb}{tibble} | |||||
| \item{res}{tibble} | |||||
| \item{fraction_only}{bool | |||||
| left join speaker information from res$speaker into tb. | |||||
| if fraction_only is TRUE, only fraction is selected from the resulting joined tibble} | |||||
| } | |||||
| \description{ | |||||
| add information from speaker table to a tibble containing speaker id | |||||
| } | |||||
| @@ -0,0 +1,24 @@ | |||||
| % Generated by roxygen2: do not edit by hand | |||||
| % Please edit documentation in R/repair.R | |||||
| \name{lookup_speaker} | |||||
| \alias{lookup_speaker} | |||||
| \title{Lookup name in speakers table} | |||||
| \usage{ | |||||
| lookup_speaker(tb, speaker, name_variable) | |||||
| } | |||||
| \arguments{ | |||||
| \item{tb}{tibble} | |||||
| \item{speaker}{tibble} | |||||
| \item{name_variable}{name | |||||
| Tries to match the name_variable column with speaker names | |||||
| returns a lookup table} | |||||
| } | |||||
| \description{ | |||||
| Tries to find the correct speaker id given a name. | |||||
| This is sufficient since every prename lastname combination in the bundestag is | |||||
| unique (luckily :D) | |||||
| } | |||||
| @@ -0,0 +1,16 @@ | |||||
| % Generated by roxygen2: do not edit by hand | |||||
| % Please edit documentation in R/analyze.R | |||||
| \docType{data} | |||||
| \name{party_colors} | |||||
| \alias{party_colors} | |||||
| \title{lookup table for party colors} | |||||
| \format{ | |||||
| An object of class \code{character} of length 8. | |||||
| } | |||||
| \usage{ | |||||
| party_colors | |||||
| } | |||||
| \description{ | |||||
| lookup table for party colors | |||||
| } | |||||
| \keyword{datasets} | |||||
| @@ -4,7 +4,17 @@ | |||||
| \alias{repair} | \alias{repair} | ||||
| \title{Repair parsed tables} | \title{Repair parsed tables} | ||||
| \usage{ | \usage{ | ||||
| repair(parse_output) | |||||
| repair(parse_output, repair_comments = FALSE) | |||||
| } | |||||
| \arguments{ | |||||
| \item{parse_output}{tibble} | |||||
| \item{repair_comments}{bool | |||||
| If repair_comments is TRUE, members of the parliament mentioned in comments are looked up in speaker table. | |||||
| Possible test: check identical(repair(res), repair(repair(res))) == TRUE | |||||
| Since repaired tables should be a fixpoint of repair.} | |||||
| } | } | ||||
| \description{ | \description{ | ||||
| Repair parsed tables | Repair parsed tables | ||||
| @@ -0,0 +1,23 @@ | |||||
| % Generated by roxygen2: do not edit by hand | |||||
| % Please edit documentation in R/analyze.R | |||||
| \name{word_usage_by_date} | |||||
| \alias{word_usage_by_date} | |||||
| \title{Counts how many talks do match a given pattern and summarises by date} | |||||
| \usage{ | |||||
| word_usage_by_date(res, patterns, name, tidy = F) | |||||
| } | |||||
| \arguments{ | |||||
| \item{res}{tibble} | |||||
| \item{patterns}{char list} | |||||
| \item{name}{char ? what is name needed for??} | |||||
| \item{tidy}{bool, default F | |||||
| shorter summary if tidy=F | |||||
| if tidy is set to T, the resulting tibble is tidy} | |||||
| } | |||||
| \description{ | |||||
| Counts how many talks do match a given pattern and summarises by date | |||||
| } | |||||