From 574480a9bb4f708dbdc66f25ca6d351123c373c7 Mon Sep 17 00:00:00 2001 From: flavis Date: Tue, 29 Jun 2021 17:29:43 +0200 Subject: [PATCH] fix vignettes --- .Rbuildignore | 2 ++ .gitignore | 2 ++ DESCRIPTION | 4 +++- NAMESPACE | 3 +++ R/hateimparlament-package.R | 1 + R/parse.R | 8 ++++++++ R/repair.R | 4 +++- man/read_all.Rd | 15 +++++++++++++++ man/repair.Rd | 11 +++++++++++ vignettes/funwithdata.Rmd | 27 ++++++++++++++++++++------- 10 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 .Rbuildignore create mode 100644 man/read_all.Rd create mode 100644 man/repair.Rd diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..72278f5 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,2 @@ +^doc$ +^Meta$ diff --git a/.gitignore b/.gitignore index 6722cd9..44e9580 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ *.xml +/doc/ +/Meta/ diff --git a/DESCRIPTION b/DESCRIPTION index cee53fb..0ec52b1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,8 +19,10 @@ Imports: pbapply, rvest, stringr, + tibble, xml2 Suggests: rmarkdown, - knitr + knitr, + ggplot2 VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index d2b8265..2b904ce 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,7 +1,10 @@ # Generated by roxygen2: do not edit by hand +export(read_all) +export(repair) import(dplyr) import(pbapply) import(stringr) import(tibble) +import(utils) import(xml2) diff --git a/R/hateimparlament-package.R b/R/hateimparlament-package.R index b2cb3a3..b2006dd 100644 --- a/R/hateimparlament-package.R +++ b/R/hateimparlament-package.R @@ -5,6 +5,7 @@ #' @import pbapply #' @import stringr #' @import xml2 +#' @import utils #' @keywords internal "_PACKAGE" diff --git a/R/parse.R b/R/parse.R index ebe97c3..bca02bc 100644 --- a/R/parse.R +++ b/R/parse.R @@ -1,5 +1,13 @@ # for usage see the example at the end +#' Parse xml records +#' +#' Creates a list of tibbles containing relevant information from all records +#' stored in the input directory. +#' +#' @param path character +#' +#' @export read_all <- function(path="records/") { cat("Reading all records from", path, "\n") available_protocols <- list.files(path) diff --git a/R/repair.R b/R/repair.R index bffc2ed..44693f6 100644 --- a/R/repair.R +++ b/R/repair.R @@ -41,7 +41,9 @@ repair_talks <- function(talks) { talks } -# repairs all tables +#' Repairs parsed tables +#' +#' @export repair <- function(parse_output) { list(redner = repair_redner(parse_output$redner), reden = repair_reden(parse_output$reden), diff --git a/man/read_all.Rd b/man/read_all.Rd new file mode 100644 index 0000000..65ad22c --- /dev/null +++ b/man/read_all.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/parse.R +\name{read_all} +\alias{read_all} +\title{Parse xml records} +\usage{ +read_all(path = "records/") +} +\arguments{ +\item{path}{character} +} +\description{ +Creates a list of tibbles containing relevant information from all records +stored in the input directory. +} diff --git a/man/repair.Rd b/man/repair.Rd new file mode 100644 index 0000000..8f3dacb --- /dev/null +++ b/man/repair.Rd @@ -0,0 +1,11 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/repair.R +\name{repair} +\alias{repair} +\title{Repairs parsed tables} +\usage{ +repair(parse_output) +} +\description{ +Repairs parsed tables +} diff --git a/vignettes/funwithdata.Rmd b/vignettes/funwithdata.Rmd index 1658939..eaec29b 100644 --- a/vignettes/funwithdata.Rmd +++ b/vignettes/funwithdata.Rmd @@ -14,23 +14,36 @@ knitr::opts_chunk$set( ) ``` +```{r setup} +library(hateimparlament) +library(dplyr) +library(ggplot2) +``` + +## Preparation of data + +First, you need to download all records of the current legislative period. ```r -read_all() %>% repair() -> res +fetch_all("../records/") # path to directory where records should be stored +``` +Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: +```{r} +read_all("../records/") %>% repair() -> res reden <- res$reden redner <- res$redner talks <- res$talks +``` +We also used `repair` to fix a bunch of formatting issues in the records and unpacked +the result into more descriptive variables. -# first tries +## Analysis +Now we can start analysing our parsed dataset, e.g. find out which party gives the most talks: +```{r} left_join(reden, redner, by=c("redner" = "id")) %>% group_by(fraktion) %>% summarize(n = n()) %>% ggplot(aes(x = fraktion, y = n)) + geom_bar(stat = "identity") ``` - - -```{r setup} -library(hateimparlament) -```