|
|
|
@@ -2,6 +2,7 @@ source("../utils/helpers.R") |
|
|
|
source("config.R") |
|
|
|
library(rvest) |
|
|
|
library(stringr) |
|
|
|
library(pbapply) |
|
|
|
|
|
|
|
mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path) |
|
|
|
|
|
|
|
@@ -13,6 +14,8 @@ mk_url <- function(offset) { |
|
|
|
download_protocol <- function(path, name) { |
|
|
|
fp <- paste0(DOWNLOAD_DIR, name) |
|
|
|
try %$% download.file(mk_absolute_url(path), fp, quiet=T) |
|
|
|
progress <<- progress + 1 |
|
|
|
setTimerProgressBar(pb, progress) |
|
|
|
} |
|
|
|
|
|
|
|
fetch_batch <- function(offset) { |
|
|
|
@@ -31,6 +34,16 @@ fetch_batch <- function(offset) { |
|
|
|
# - what if: page not reachable |
|
|
|
# - wrong format, etc. |
|
|
|
fetch_all <- function() { |
|
|
|
cat("Fetching all available protocols from bundestag.de. This may take a while ...\n") |
|
|
|
|
|
|
|
# create progress bar |
|
|
|
pb <<- timerProgressBar(min=0, max=250, width=40, char="+") |
|
|
|
progress <<- 0 |
|
|
|
# close progress bar on exit (also on error) |
|
|
|
on.exit({close(pb); cat("Done.\n")}) |
|
|
|
# fetch batch by batch |
|
|
|
offset <- 0 |
|
|
|
while(fetch_batch(offset)) offset <- offset + 10 |
|
|
|
# if successful, set progressbar to 100% |
|
|
|
setTimerProgressBar(pb, 250) |
|
|
|
} |