diff --git a/scraping/fetch.R b/scraping/fetch.R index 9faefe8..dd3bb9b 100644 --- a/scraping/fetch.R +++ b/scraping/fetch.R @@ -2,6 +2,7 @@ source("../utils/helpers.R") source("config.R") library(rvest) library(stringr) +library(pbapply) mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path) @@ -13,6 +14,8 @@ mk_url <- function(offset) { download_protocol <- function(path, name) { fp <- paste0(DOWNLOAD_DIR, name) try %$% download.file(mk_absolute_url(path), fp, quiet=T) + progress <<- progress + 1 + setTimerProgressBar(pb, progress) } fetch_batch <- function(offset) { @@ -31,6 +34,16 @@ fetch_batch <- function(offset) { # - what if: page not reachable # - wrong format, etc. fetch_all <- function() { + cat("Fetching all available protocols from bundestag.de. This may take a while ...\n") + + # create progress bar + pb <<- timerProgressBar(min=0, max=250, width=40, char="+") + progress <<- 0 + # close progress bar on exit (also on error) + on.exit({close(pb); cat("Done.\n")}) + # fetch batch by batch offset <- 0 while(fetch_batch(offset)) offset <- offset + 10 + # if successful, set progressbar to 100% + setTimerProgressBar(pb, 250) }