|
- #' Count number of occurences of a given word
- #'
- #' @param res tibble
- #' @param word character
- #'
- #' Add number of occurences of word to talks
- #'
- #' @export
- find_word <- function(res, word) {
- is_valid_res(res)
- stopifnot("word must be of type character" = is.character(word))
- talks <- res$talks
- mutate(
- talks,
- occurences = sapply(
- str_match_all(talks$content, regex(word, ignore_case = TRUE)),
- nrow
- )
- )
- }
-
- #' add information from speaker table to a tibble containing speaker id
- #'
- #' @param tb tibble
- #' @param res list of tibbles
- #' @param fraction_only if TRUE, only select fraction from the resulting joined tibble
- #'
- #' left join speaker information from res$speaker into tb.
- #' if fraction_only, drop all columns but fraction
- #'
- #' @export
- join_speaker <- function(tb, res, fraction_only = F) {
- is_valid_res(res)
- stopifnot("fraction_only must be of type logical" = is.logical(fraction_only))
- stopifnot("tb must be a tibble" = inherits(tb, "tbl"))
- stopifnot("tb must have a speaker column" = "speaker" %in% names(tb))
-
- joined <- left_join(tb, res$speaker, by=c("speaker" = "id"))
- if (fraction_only) select(joined, "fraction")
- else joined
- }
-
- #' lookup table for official party colors
- #'
- #' @export
- party_colors <- c(
- AfD="#1A9FDD",
- FDP="#FEEB34",
- "CDU/CSU"="#000000",
- SPD="#DF0B25",
- "B\u00DCNDNIS 90/DIE GR\u00DCNEN"="#4A932B",
- "DIE LINKE"="#BC3475",
- "AfD&Fraktionslos"="#AAAAFF",
- Fraktionslos="#AAAAAA"
- )
-
- party_order <- factor(c("Fraktionslos", "AfD&Fraktionslos",
- "DIE LINKE", "B\u00DCNDNIS 90/DIE GR\u00DCNEN", "SPD", "CDU/CSU",
- "FDP", "AfD", NA_character_))
-
- #' Bar chart visualizing fraction based data
- #'
- #' Can be configured to also visualize data not related to fractions.
- #'
- #' @param tb tibble
- #' @param x_variable column in tb, default is fraction
- #' @param y_variable column in tb, default is n
- #' @param fill column in tb, default is fraction
- #' @param title plot title
- #' @param xlab label for x axis, default is fraction
- #' @param ylab label for y axis, default is n
- #' @param filllab default is 'Fraction'
- #' @param flipped if TRUE draw bars horizontally, else vertically. Default is TRUE
- #' @param position default is 'dodge'
- #' @param reorder Either reorder fraction factor by variable value or reorder fraction factor by party seat order in parliament (default).
- #' @param rotatelab Default is FALSE. If true turns the labels 90 degrees to the axis.
- #'
- #' plot data from tb in the following way: for each item in x_variable show the corresponding value in y_variable.
- #' Then color the plot depending on the fill value.
- #' Give the plot a title and a label for x-axis and y-axis,
- #' color the legend according to filllab and finally
- #' improve positioning details according to position
- #'
- #' @export
- bar_plot_fractions <- function(tb,
- x_variable = NULL, # default is fraction
- y_variable = NULL, # default is n
- fill = NULL, # default is fraction
- title = NULL,
- xlab = "Fraction",
- ylab = "n",
- filllab = "Fraction",
- flipped = TRUE,
- position = "dodge",
- reorder = FALSE,
- rotatelab = FALSE) {
- # capture expressions in arguments
- fill <- enexpr(fill)
- y_variable <- enexpr(y_variable)
- x_variable <- enexpr(x_variable)
-
- # set default values
- if (is.null(fill)) fill <- expr(fraction)
- if (is.null(y_variable)) y_variable <- expr(n)
- if (is.null(x_variable)) x_variable <- expr(fraction)
-
- # check if variables exist
- if (!rlang::expr_text(x_variable) %in% names(tb))
- stop(paste0(rlang::expr_text(x_variable),
- " is not a column of tb. Did you set x_variable accordingly?"),
- .call = NULL)
- if (!rlang::expr_text(y_variable) %in% names(tb))
- stop(paste0(rlang::expr_text(y_variable),
- " is not a column of tb. Did you set y_variable accordingly?"),
- .call = NULL)
- if (!rlang::expr_text(fill) %in% names(tb))
- stop(paste0(rlang::expr_text(fill),
- " is not a column of tb. Did you set fill accordingly?"),
- .call = NULL)
-
- # check argument types
- stopifnot("title has to be of type character or NULL" = is.character(title) || is.null(title))
- stopifnot("xlab has to be of type character" = is.character(xlab))
- stopifnot("ylab has to be of type character" = is.character(ylab))
- stopifnot("filllab has to be of type character" = is.character(filllab))
- stopifnot("flipped has to be of type logical" = is.logical(flipped))
- stopifnot("rotatelab has to be of type logical" = is.logical(rotatelab))
- stopifnot("reorder has to be of type logical" = is.logical(reorder))
-
- # either reorder fraction factor by variable value
- if (reorder) maps <- aes(x = reorder(!!x_variable, -!!y_variable),
- y = !!y_variable,
- fill = reorder(!!fill, -!!y_variable))
- # or reorder fraction factor by party seat order in parliament (default)
- else maps <- aes(x = factor(!!x_variable, levels = party_order),
- y = !!y_variable,
- fill = factor(!!fill, levels = party_order))
-
- # make a bar plot
- ggplot(tb, maps) +
- scale_fill_manual(values = party_colors, na.value = "#555555") +
- xlab(xlab) +
- ylab(ylab) +
- labs(fill = filllab) +
- ggtitle(title) +
- geom_bar(stat = "identity", position = position) ->
- plt
-
- # if rotatelab == TRUE, rotate x labels by 90 degrees
- if (rotatelab)
- plt + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) -> plt
-
- # if flipped == TRUE, draw bars horizontally (default TRUE)
- if (flipped) plt + coord_flip() else plt
- }
-
- #' Word usage summarised by date
- #'
- #' Counts how many talks do match a given pattern and summarises by date.
- #'
- #' @param res List of Tibbles to be analysed.
- #' @param patterns Words to look up.
- #' @param tidy default is FALSE.
- #'
- #' @export
- word_usage_by_date <- function(res, patterns, tidy=F) {
- is_valid_res(res)
- stopifnot("patterns must be of type character" = is.character(patterns))
- stopifnot("tidy must be of type logical" = is.logical(tidy))
-
- tb <- res$talks
- nms <- names(patterns)
- for (i in seq_along(patterns)) {
- if (!is.null(nms)) name <- nms[[i]]
- else name <- patterns[[i]]
- tb <- mutate(tb, {{name}} := str_count(content, patterns[[i]]))
- }
- left_join(tb, res$speeches, by=c("speech_id" = "id")) %>%
- group_by(date) %>%
- summarize(across(where(is.numeric), sum)) %>%
- arrange(date) -> tb
- if (!tidy) pivot_longer(tb, where(is.numeric) , names_to = "pattern", values_to="count")
- else tb
- }
|