Explorar el Código

add progress bar for fetching

package
flavis hace 4 años
padre
commit
72a32c5cc7
Se han modificado 1 ficheros con 13 adiciones y 0 borrados
  1. +13
    -0
      scraping/fetch.R

+ 13
- 0
scraping/fetch.R Ver fichero

@@ -2,6 +2,7 @@ source("../utils/helpers.R")
source("config.R")
library(rvest)
library(stringr)
library(pbapply)

mk_absolute_url <- function(path) paste0("https://www.bundestag.de", path)

@@ -13,6 +14,8 @@ mk_url <- function(offset) {
download_protocol <- function(path, name) {
fp <- paste0(DOWNLOAD_DIR, name)
try %$% download.file(mk_absolute_url(path), fp, quiet=T)
progress <<- progress + 1
setTimerProgressBar(pb, progress)
}

fetch_batch <- function(offset) {
@@ -31,6 +34,16 @@ fetch_batch <- function(offset) {
# - what if: page not reachable
# - wrong format, etc.
fetch_all <- function() {
cat("Fetching all available protocols from bundestag.de. This may take a while ...\n")
# create progress bar
pb <<- timerProgressBar(min=0, max=250, width=40, char="+")
progress <<- 0
# close progress bar on exit (also on error)
on.exit({close(pb); cat("Done.\n")})
# fetch batch by batch
offset <- 0
while(fetch_batch(offset)) offset <- offset + 10
# if successful, set progressbar to 100%
setTimerProgressBar(pb, 250)
}

Cargando…
Cancelar
Guardar