#' Count number of occurences of a given word #' #' @param res tibble #' @param word character #' #' Add number of occurences of word to talks #' #' @export find_word <- function(res, word) { is_valid_res(res) stopifnot("word must be of type character" = is.character(word)) talks <- res$talks mutate( talks, occurences = sapply( str_match_all(talks$content, regex(word, ignore_case = TRUE)), nrow ) ) } #' add information from speaker table to a tibble containing speaker id #' #' @param tb tibble #' @param res list of tibbles #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble #' #' left join speaker information from res$speaker into tb. #' if fraction_only, drop all columns but fraction #' #' @export join_speaker <- function(tb, res, fraction_only = F) { is_valid_res(res) stopifnot("fraction_only must be of type logical" = is.logical(fraction_only)) stopifnot("tb must be a tibble" = inherits(tb, "tbl")) stopifnot("tb must have a speaker column" = "speaker" %in% names(tb)) joined <- left_join(tb, res$speaker, by=c("speaker" = "id")) if (fraction_only) select(joined, "fraction") else joined } #' lookup table for official party colors #' #' @export party_colors <- c( AfD="#1A9FDD", FDP="#FEEB34", "CDU/CSU"="#000000", SPD="#DF0B25", "BÜNDNIS 90/DIE GRÜNEN"="#4A932B", "DIE LINKE"="#BC3475", "AfD&Fraktionslos"="#AAAAFF", Fraktionslos="#AAAAAA" ) party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos", "DIE LINKE", "BÜNDNIS 90/DIE GRÜNEN", "SPD", "CDU/CSU", "FDP", "AfD", NA_character_)) #' Bar chart visualizing fraction based data #' #' Can be configured to also visualize data not related to fractions. #' #' @param tb tibble #' @param x_variable column in tb, default is fraction #' @param y_variable column in tb, default is n #' @param fill column in tb, default is fraction #' @param title plot title #' @param xlab label for x axis, default is fraction #' @param ylab label for y axis, default is n #' @param filllab default is 'Fraction' #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE #' @param position default is 'dodge' #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default). #' @param rotatelab Default is FALSE. If true turns the labels 90 degrees to the axis. #' #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable. #' Then color the plot depending on the fill value. #' Give the plot a title and a label for x-axis and y-axis, #' color the legend according to filllab and finally #' improve positioning details according to position #' #' @export bar_plot_fractions <- function(tb, x_variable = NULL, # default is fraction y_variable = NULL, # default is n fill = NULL, # default is fraction title = NULL, xlab = "Fraction", ylab = "n", filllab = "Fraction", flipped = TRUE, position = "dodge", reorder = FALSE, rotatelab = FALSE) { # capture expressions in arguments fill <- enexpr(fill) y_variable <- enexpr(y_variable) x_variable <- enexpr(x_variable) # set default values if (is.null(fill)) fill <- expr(fraction) if (is.null(y_variable)) y_variable <- expr(n) if (is.null(x_variable)) x_variable <- expr(fraction) # check if variables exist if (!rlang::expr_text(x_variable) %in% names(tb)) stop(paste0(rlang::expr_text(x_variable), " is not a column of tb. Did you set x_variable accordingly?"), .call = NULL) if (!rlang::expr_text(y_variable) %in% names(tb)) stop(paste0(rlang::expr_text(y_variable), " is not a column of tb. Did you set y_variable accordingly?"), .call = NULL) if (!rlang::expr_text(fill) %in% names(tb)) stop(paste0(rlang::expr_text(fill), " is not a column of tb. Did you set fill accordingly?"), .call = NULL) # check argument types stopifnot("title has to be of type character or NULL" = is.character(title) || is.null(title)) stopifnot("xlab has to be of type character" = is.character(xlab)) stopifnot("ylab has to be of type character" = is.character(ylab)) stopifnot("filllab has to be of type character" = is.character(filllab)) stopifnot("flipped has to be of type logical" = is.logical(flipped)) stopifnot("rotatelab has to be of type logical" = is.logical(rotatelab)) stopifnot("reorder has to be of type logical" = is.logical(reorder)) # either reorder fraction factor by variable value if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable), y = !!y_variable, fill = reorder(!!fill, -!!y_variable)) # or reorder fraction factor by party seat order in parliament (default) else maps <- aes(x = factor(!!x_variable, levels = party_order), y = !!y_variable, fill = factor(!!fill, levels = party_order)) # make a bar plot ggplot(tb, maps) + scale_fill_manual(values = party_colors, na.value = "#555555") + xlab(xlab) + ylab(ylab) + labs(fill = filllab) + ggtitle(title) + geom_bar(stat = "identity", position = position) -> plt # if rotatelab == TRUE, rotate x labels by 90 degrees if (rotatelab) plt + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) -> plt # if flipped == TRUE, draw bars horizontally (default TRUE) if (flipped) plt + coord_flip() else plt } #' Word usage summarised by date #' #' Counts how many talks do match a given pattern and summarises by date. #' #' @param res List of Tibbles to be analysed. #' @param patterns Words to look up. #' @param tidy default is FALSE. #' #' @export word_usage_by_date <- function(res, patterns, tidy=F) { is_valid_res(res) stopifnot("patterns must be of type character" = is.character(patterns)) stopifnot("tidy must be of type logical" = is.logical(tidy)) tb <- res$talks nms <- names(patterns) for (i in seq_along(patterns)) { if (!is.null(nms)) name <- nms[[i]] else name <- patterns[[i]] tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]])) } left_join(tb, res$speeches, by=c("speech_id" = "id")) %>% group_by(date) %>% summarize(across(where(is.numeric), sum)) %>% arrange(date) -> tb if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count") else tb }