From 574480a9bb4f708dbdc66f25ca6d351123c373c7 Mon Sep 17 00:00:00 2001
From: flavis <christian@flavigny.de>
Date: Tue, 29 Jun 2021 17:29:43 +0200
Subject: [PATCH] fix vignettes

---
 .Rbuildignore               |  2 ++
 .gitignore                  |  2 ++
 DESCRIPTION                 |  4 +++-
 NAMESPACE                   |  3 +++
 R/hateimparlament-package.R |  1 +
 R/parse.R                   |  8 ++++++++
 R/repair.R                  |  4 +++-
 man/read_all.Rd             | 15 +++++++++++++++
 man/repair.Rd               | 11 +++++++++++
 vignettes/funwithdata.Rmd   | 27 ++++++++++++++++++++-------
 10 files changed, 68 insertions(+), 9 deletions(-)
 create mode 100644 .Rbuildignore
 create mode 100644 man/read_all.Rd
 create mode 100644 man/repair.Rd

diff --git a/.Rbuildignore b/.Rbuildignore
new file mode 100644
index 0000000..72278f5
--- /dev/null
+++ b/.Rbuildignore
@@ -0,0 +1,2 @@
+^doc$
+^Meta$
diff --git a/.gitignore b/.gitignore
index 6722cd9..44e9580 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 *.xml
+/doc/
+/Meta/
diff --git a/DESCRIPTION b/DESCRIPTION
index cee53fb..0ec52b1 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -19,8 +19,10 @@ Imports:
     pbapply,
     rvest,
     stringr,
+    tibble,
     xml2
 Suggests: 
     rmarkdown,
-    knitr
+    knitr,
+    ggplot2
 VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
index d2b8265..2b904ce 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,7 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
+export(read_all)
+export(repair)
 import(dplyr)
 import(pbapply)
 import(stringr)
 import(tibble)
+import(utils)
 import(xml2)
diff --git a/R/hateimparlament-package.R b/R/hateimparlament-package.R
index b2cb3a3..b2006dd 100644
--- a/R/hateimparlament-package.R
+++ b/R/hateimparlament-package.R
@@ -5,6 +5,7 @@
 #' @import pbapply
 #' @import stringr
 #' @import xml2
+#' @import utils
 #' @keywords internal
 "_PACKAGE"
 
diff --git a/R/parse.R b/R/parse.R
index ebe97c3..bca02bc 100644
--- a/R/parse.R
+++ b/R/parse.R
@@ -1,5 +1,13 @@
 # for usage see the example at the end
 
+#' Parse xml records
+#'
+#' Creates a list of tibbles containing relevant information from all records
+#' stored in the input directory.
+#'
+#' @param path character
+#'
+#' @export
 read_all <- function(path="records/") {
     cat("Reading all records from", path, "\n")
     available_protocols <- list.files(path)
diff --git a/R/repair.R b/R/repair.R
index bffc2ed..44693f6 100644
--- a/R/repair.R
+++ b/R/repair.R
@@ -41,7 +41,9 @@ repair_talks <- function(talks) {
     talks
 }
 
-# repairs all tables
+#' Repairs parsed tables
+#'
+#' @export
 repair <- function(parse_output) {
     list(redner = repair_redner(parse_output$redner),
          reden = repair_reden(parse_output$reden),
diff --git a/man/read_all.Rd b/man/read_all.Rd
new file mode 100644
index 0000000..65ad22c
--- /dev/null
+++ b/man/read_all.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/parse.R
+\name{read_all}
+\alias{read_all}
+\title{Parse xml records}
+\usage{
+read_all(path = "records/")
+}
+\arguments{
+\item{path}{character}
+}
+\description{
+Creates a list of tibbles containing relevant information from all records
+stored in the input directory.
+}
diff --git a/man/repair.Rd b/man/repair.Rd
new file mode 100644
index 0000000..8f3dacb
--- /dev/null
+++ b/man/repair.Rd
@@ -0,0 +1,11 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/repair.R
+\name{repair}
+\alias{repair}
+\title{Repairs parsed tables}
+\usage{
+repair(parse_output)
+}
+\description{
+Repairs parsed tables
+}
diff --git a/vignettes/funwithdata.Rmd b/vignettes/funwithdata.Rmd
index 1658939..eaec29b 100644
--- a/vignettes/funwithdata.Rmd
+++ b/vignettes/funwithdata.Rmd
@@ -14,23 +14,36 @@ knitr::opts_chunk$set(
 )
 ```
 
+```{r setup}
+library(hateimparlament)
+library(dplyr)
+library(ggplot2)
+```
+
+## Preparation of data
+
+First, you need to download all records of the current legislative period.
 ```r
-read_all() %>% repair() -> res
+fetch_all("../records/") # path to directory where records should be stored
+```
+Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
+```{r}
+read_all("../records/") %>% repair() -> res
 
 reden <- res$reden
 redner <- res$redner
 talks <- res$talks
+```
+We also used `repair` to fix a bunch of formatting issues in the records and unpacked
+the result into more descriptive variables.
 
-# first tries
+## Analysis
 
+Now we can start analysing our parsed dataset, e.g. find out which party gives the most talks:
+```{r}
 left_join(reden, redner, by=c("redner" = "id")) %>%
     group_by(fraktion) %>%
     summarize(n = n()) %>%
     ggplot(aes(x = fraktion, y = n)) +
     geom_bar(stat = "identity")
 ```
-
-
-```{r setup}
-library(hateimparlament)
-```