Просмотр исходного кода

add record existence checks in read_all

genderequality-alternative
flavis 4 лет назад
Родитель
Сommit
1152115a0f
3 измененных файлов: 26 добавлений и 9 удалений
  1. +2
    -3
      R/fetch.R
  2. +6
    -0
      R/helpers.R
  3. +18
    -6
      R/parse.R

+ 2
- 3
R/fetch.R Просмотреть файл

@@ -42,9 +42,8 @@ fetch_batch <- function(offset, download_dir) {
#'
#' @export
fetch_all <- function(download_dir="inst/records/", create=FALSE) {
# check if download_dir path is a directory path
if (str_sub(download_dir, -1) != .Platform$file.sep)
download_dir <- str_c(download_dir, .Platform$file.sep)
# append file separator if needed
download_dir <- make_directory_path(download_dir)

check_directory(download_dir, create)
cat("Fetching all available records from bundestag.de. This may take a while ...\n")


+ 6
- 0
R/helpers.R Просмотреть файл

@@ -18,3 +18,9 @@ check_directory <- function(path, create=F) {
stop("Directory exists, but is not writeable.")
}
}

# appends a file seperator at end of path if needed
make_directory_path <- function(path) {
if (!str_ends(path, .Platform$file.sep)) str_c(path, .Platform$file.sep)
else path
}

+ 18
- 6
R/parse.R Просмотреть файл

@@ -3,16 +3,27 @@
#' Creates a list of tibbles containing relevant information from all records
#' stored in the input directory.
#'
#' @param path character
#' @param path path to records directory
#' @param pattern search pattern to find records in directory
#'
#' @export
read_all <- function(path="inst/records/") {
read_all <- function(path="inst/records/", pattern="-data\\.xml") {
# append file separator if needed
path <- make_directory_path(path)
cat("Reading all records from", path, "\n")
available_protocols <- list.files(path)
res <- pblapply(available_protocols, read_one, path=path)

# list all files in directory and filter by search pattern
fs <- list.files(path)
available_protocols <- fs[str_detect(fs, pattern)]
if (length(available_protocols) == 0)
stop("The given directory is empty or does not exist.")
stop(paste0("The given directory does not exist or does not contain files matching \"",
pattern,
"\"."))

# parse records one by one and remove null entries
res <- compact %$% pblapply(available_protocols, read_one, path=path)
if (length(res) == 0) stop("No valid records found. Did you fetch successfully?")

lapply(res, `[[`, "speaker") %>%
bind_rows() %>%
@@ -55,7 +66,8 @@ read_all <- function(path="inst/records/") {
# this reads all currently parseable data from one xml
read_one <- function(name, path) {
x <- tryCatch(read_xml(paste0(path, name)),
error = function(c) NULL)
error = function(c) NULL,
warning = function(c) NULL)
if (is.null(x)) return(NULL)
# extract date of session
date <- xml_attr(x, "sitzung-datum")


Загрузка…
Отмена
Сохранить