#' Count number of occurences of a given word #' #' @param res tibble #' @param word character #' #' Add number of occurences of word to talks #' #' @export find_word <- function(res, word) { talks <- res$talks mutate( talks, occurences = sapply( str_match_all(talks$content, regex(word, ignore_case = TRUE)), nrow ) ) } #' add information from speaker table to a tibble containing speaker id #' #' @param tb tibble #' @param res tibble #' @param fraction_only bool #' #' left join speaker information from res$speaker into tb. #' if fraction_only is TRUE, only fraction is selected from the resulting joined tibble #' #' @export join_speaker <- function(tb, res, fraction_only = F) { joined <- left_join(tb, res$speaker, by=c("speaker" = "id")) if (fraction_only) select(joined, "fraction") else joined } #' lookup table for party colors #' #' @export party_colors <- c( AfD="#1A9FDD", FDP="#FEEB34", "CDU/CSU"="#000000", SPD="#DF0B25", "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B", "DIE LINKE"="#BC3475", "AfD&Fraktionslos"="#AAAAFF", Fraktionslos="#AAAAAA" ) party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos", "DIE LINKE", "BÜNDNIS 90 / DIE GRÜNEN", "SPD", "CDU/CSU", "FDP", "AfD", NA_character_)) #' plot data depending on fractions in a standardized, configurable way #' #' @param tb tibble #' @param x_variable column in tb #' @param y_variable column in tb #' @param fill column in tb #' @param title char #' @param xlab char #' @param ylab char #' @param filllab char #' @param flipped bool #' @param position char #' @param reorder bool #' #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. #' Then color the plot depending on the fill value #' Give the plot a title, an x-label xlab as well as an y-label ylab #' Color the legend according to filllab #' Setting flipped to TRUE makes the bars horizontal #' Improve positioning details according to position #' and finally reorder x_variable (default ist to order fractions according to seat order) #' #' @export bar_plot_fractions <- function(tb, x_variable = NULL, # default is fraction y_variable = NULL, # default is n fill = NULL, # default is fraction title = NULL, xlab = "Fraction", ylab = "n", filllab = "Fraction", flipped = TRUE, position = "dodge", reorder = FALSE) { # capture expressions in arguments fill <- enexpr(fill) y_variable <- enexpr(y_variable) x_variable <- enexpr(x_variable) # set default values if (is.null(fill)) fill <- expr(fraction) if (is.null(y_variable)) y_variable <- expr(n) if (is.null(x_variable)) x_variable <- expr(fraction) # either reorder fraction factor by variable value if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable), y = !!y_variable, fill = reorder(!!fill, -!!y_variable)) # or reorder fraction factor by party seat order in parliament (default) else maps <- aes(x = factor(!!x_variable, levels = party_order), y = !!y_variable, fill = factor(!!fill, levels = party_order)) # make a bar plot ggplot(tb, maps) + scale_fill_manual(values = party_colors, na.value = "#555555") + xlab(xlab) + ylab(ylab) + labs(fill = filllab) + ggtitle(title) + geom_bar(stat = "identity", position = position) -> plt # if flipped == TRUE, draw bars horizontally (default TRUE) if (flipped) plt + coord_flip() else plt } #' Counts how many talks do match a given pattern and summarises by date #' #' @param res tibble #' @param patterns char list #' @param name char ? what is name needed for?? #' @param tidy bool, default F #' #' shorter summary if tidy=F #' if tidy is set to T, the resulting tibble is tidy #' #' @export word_usage_by_date <- function(res, patterns, name, tidy=F) { tb <- res$talks nms <- names(patterns) for (i in seq_along(patterns)) { if (!is.null(nms)) name <- nms[[i]] else name <- patterns[[i]] tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]])) } left_join(tb, res$speeches, by=c("speech_id" = "id")) %>% group_by(date) %>% summarize(across(where(is.numeric), sum)) %>% arrange(date) -> tb if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count") else tb }