From 1152115a0f77a8aebd9a4439eaaa9bc25b67d120 Mon Sep 17 00:00:00 2001 From: flavis Date: Mon, 9 Aug 2021 18:38:56 +0200 Subject: [PATCH] add record existence checks in read_all --- R/fetch.R | 5 ++--- R/helpers.R | 6 ++++++ R/parse.R | 24 ++++++++++++++++++------ 3 files changed, 26 insertions(+), 9 deletions(-) diff --git a/R/fetch.R b/R/fetch.R index a6262f4..eb58921 100644 --- a/R/fetch.R +++ b/R/fetch.R @@ -42,9 +42,8 @@ fetch_batch <- function(offset, download_dir) { #' #' @export fetch_all <- function(download_dir="inst/records/", create=FALSE) { - # check if download_dir path is a directory path - if (str_sub(download_dir, -1) != .Platform$file.sep) - download_dir <- str_c(download_dir, .Platform$file.sep) + # append file separator if needed + download_dir <- make_directory_path(download_dir) check_directory(download_dir, create) cat("Fetching all available records from bundestag.de. This may take a while ...\n") diff --git a/R/helpers.R b/R/helpers.R index 1e0120f..863852c 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -18,3 +18,9 @@ check_directory <- function(path, create=F) { stop("Directory exists, but is not writeable.") } } + +# appends a file seperator at end of path if needed +make_directory_path <- function(path) { + if (!str_ends(path, .Platform$file.sep)) str_c(path, .Platform$file.sep) + else path +} diff --git a/R/parse.R b/R/parse.R index c6c0e17..4d59ffc 100644 --- a/R/parse.R +++ b/R/parse.R @@ -3,16 +3,27 @@ #' Creates a list of tibbles containing relevant information from all records #' stored in the input directory. #' -#' @param path character +#' @param path path to records directory +#' @param pattern search pattern to find records in directory #' #' @export -read_all <- function(path="inst/records/") { +read_all <- function(path="inst/records/", pattern="-data\\.xml") { + # append file separator if needed + path <- make_directory_path(path) + cat("Reading all records from", path, "\n") - available_protocols <- list.files(path) - res <- pblapply(available_protocols, read_one, path=path) + # list all files in directory and filter by search pattern + fs <- list.files(path) + available_protocols <- fs[str_detect(fs, pattern)] if (length(available_protocols) == 0) - stop("The given directory is empty or does not exist.") + stop(paste0("The given directory does not exist or does not contain files matching \"", + pattern, + "\".")) + + # parse records one by one and remove null entries + res <- compact %$% pblapply(available_protocols, read_one, path=path) + if (length(res) == 0) stop("No valid records found. Did you fetch successfully?") lapply(res, `[[`, "speaker") %>% bind_rows() %>% @@ -55,7 +66,8 @@ read_all <- function(path="inst/records/") { # this reads all currently parseable data from one xml read_one <- function(name, path) { x <- tryCatch(read_xml(paste0(path, name)), - error = function(c) NULL) + error = function(c) NULL, + warning = function(c) NULL) if (is.null(x)) return(NULL) # extract date of session date <- xml_attr(x, "sitzung-datum")