From e038d86339e35bf5a6b61214c3e6113032209727 Mon Sep 17 00:00:00 2001 From: flavis Date: Tue, 10 Aug 2021 12:42:41 +0200 Subject: [PATCH] add check for valid structure of tables --- DESCRIPTION | 1 + R/analyze.R | 3 +++ R/helpers.R | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index a0170ad..3ef2aa8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,6 +16,7 @@ Roxygen: list(markdown = TRUE) RoxygenNote: 7.1.1 Imports: dplyr, + lubridate, pbapply, purrr, rvest, diff --git a/R/analyze.R b/R/analyze.R index 4570a02..984c897 100644 --- a/R/analyze.R +++ b/R/analyze.R @@ -7,6 +7,7 @@ #' #' @export find_word <- function(res, word) { + is_valid_res(res) talks <- res$talks mutate( talks, @@ -28,6 +29,7 @@ find_word <- function(res, word) { #' #' @export join_speaker <- function(tb, res, fraction_only = F) { + is_valid_res(res) joined <- left_join(tb, res$speaker, by=c("speaker" = "id")) if (fraction_only) select(joined, "fraction") else joined @@ -145,6 +147,7 @@ bar_plot_fractions <- function(tb, #' #' @export word_usage_by_date <- function(res, patterns, tidy=F) { + is_valid_res(res) tb <- res$talks nms <- names(patterns) for (i in seq_along(patterns)) { diff --git a/R/helpers.R b/R/helpers.R index 863852c..db097dd 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -24,3 +24,38 @@ make_directory_path <- function(path) { if (!str_ends(path, .Platform$file.sep)) str_c(path, .Platform$file.sep) else path } + +# check if res is of expected format +is_valid_res <- function(res) { + stopifnot("Data is missing relevant tables. Is this a return value of read_all or repair?" + = all(c("speaker", "speeches", "talks", "comments", "applause") %in% names(res))) + stopifnot("Some entries of res are no tibbles." + = all(sapply(res, typeof) == "list" & "tbl" %in% sapply(res, class))) + stopifnot("Speaker table is of wrong format." + = all(c("id", "prename", "lastname", "fraction", "title", "role_short", "role_long") + %in% names(res$speaker)) && + all(sapply(res$speaker, is.character))) + stopifnot("Speeches table is of wrong format." + = all(c("id", "speaker", "date") %in% names(res$speeches)) && + is.character(res$speeches$id) && + is.character(res$speeches$speaker) && + lubridate::is.Date(res$speeches$date)) + stopifnot("Talks table is of wrong format." + = all(c("speech_id", "speaker", "content") %in% names(res$talks)) && + all(sapply(res$talks, is.character))) + stopifnot("Comments table is of wrong format." + = all(c("speech_id", "on_speaker", "fraction", "commenter", "content") + %in% names(res$comments)) && + all(sapply(res$comments, is.character))) + stopifnot("Applause table is of wrong format." + = all(c("speech_id", "on_speaker", "CDU_CSU", "SPD", "FDP", "DIE_LINKE", "BUENDNIS_90_DIE_GRUENEN", "AfD") + %in% names(res$applause)) && + is.character(res$applause$speech_id) && + is.character(res$applause$on_speaker) && + is.logical(res$applause$`CDU_CSU`) && + is.logical(res$applause$`SPD`) && + is.logical(res$applause$`FDP`) && + is.logical(res$applause$`DIE_LINKE`) && + is.logical(res$applause$`AfD`) && + is.logical(res$applause$`BUENDNIS_90_DIE_GRUENEN`)) +}