| @@ -2,6 +2,7 @@ source("../utils/helpers.R") | |||||
| source("config.R") | source("config.R") | ||||
| library(rvest) | library(rvest) | ||||
| library(stringr) | library(stringr) | ||||
| library(pbapply) | |||||
| mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path) | mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path) | ||||
| @@ -13,6 +14,8 @@ mk_url <- function(offset) { | |||||
| download_protocol <- function(path, name) { | download_protocol <- function(path, name) { | ||||
| fp <- paste0(DOWNLOAD_DIR, name) | fp <- paste0(DOWNLOAD_DIR, name) | ||||
| try %$% download.file(mk_absolute_url(path), fp, quiet=T) | try %$% download.file(mk_absolute_url(path), fp, quiet=T) | ||||
| progress <<- progress + 1 | |||||
| setTimerProgressBar(pb, progress) | |||||
| } | } | ||||
| fetch_batch <- function(offset) { | fetch_batch <- function(offset) { | ||||
| @@ -31,6 +34,16 @@ fetch_batch <- function(offset) { | |||||
| # - what if: page not reachable | # - what if: page not reachable | ||||
| # - wrong format, etc. | # - wrong format, etc. | ||||
| fetch_all <- function() { | fetch_all <- function() { | ||||
| cat("Fetching all available protocols from bundestag.de. This may take a while ...\n") | |||||
| # create progress bar | |||||
| pb <<- timerProgressBar(min=0, max=250, width=40, char="+") | |||||
| progress <<- 0 | |||||
| # close progress bar on exit (also on error) | |||||
| on.exit({close(pb); cat("Done.\n")}) | |||||
| # fetch batch by batch | |||||
| offset <- 0 | offset <- 0 | ||||
| while(fetch_batch(offset)) offset <- offset + 10 | while(fetch_batch(offset)) offset <- offset + 10 | ||||
| # if successful, set progressbar to 100% | |||||
| setTimerProgressBar(pb, 250) | |||||
| } | } | ||||