| @@ -0,0 +1,2 @@ | |||||
| ^doc$ | |||||
| ^Meta$ | |||||
| @@ -1 +1,3 @@ | |||||
| *.xml | *.xml | ||||
| /doc/ | |||||
| /Meta/ | |||||
| @@ -19,8 +19,10 @@ Imports: | |||||
| pbapply, | pbapply, | ||||
| rvest, | rvest, | ||||
| stringr, | stringr, | ||||
| tibble, | |||||
| xml2 | xml2 | ||||
| Suggests: | Suggests: | ||||
| rmarkdown, | rmarkdown, | ||||
| knitr | |||||
| knitr, | |||||
| ggplot2 | |||||
| VignetteBuilder: knitr | VignetteBuilder: knitr | ||||
| @@ -1,7 +1,10 @@ | |||||
| # Generated by roxygen2: do not edit by hand | # Generated by roxygen2: do not edit by hand | ||||
| export(read_all) | |||||
| export(repair) | |||||
| import(dplyr) | import(dplyr) | ||||
| import(pbapply) | import(pbapply) | ||||
| import(stringr) | import(stringr) | ||||
| import(tibble) | import(tibble) | ||||
| import(utils) | |||||
| import(xml2) | import(xml2) | ||||
| @@ -5,6 +5,7 @@ | |||||
| #' @import pbapply | #' @import pbapply | ||||
| #' @import stringr | #' @import stringr | ||||
| #' @import xml2 | #' @import xml2 | ||||
| #' @import utils | |||||
| #' @keywords internal | #' @keywords internal | ||||
| "_PACKAGE" | "_PACKAGE" | ||||
| @@ -1,5 +1,13 @@ | |||||
| # for usage see the example at the end | # for usage see the example at the end | ||||
| #' Parse xml records | |||||
| #' | |||||
| #' Creates a list of tibbles containing relevant information from all records | |||||
| #' stored in the input directory. | |||||
| #' | |||||
| #' @param path character | |||||
| #' | |||||
| #' @export | |||||
| read_all <- function(path="records/") { | read_all <- function(path="records/") { | ||||
| cat("Reading all records from", path, "\n") | cat("Reading all records from", path, "\n") | ||||
| available_protocols <- list.files(path) | available_protocols <- list.files(path) | ||||
| @@ -41,7 +41,9 @@ repair_talks <- function(talks) { | |||||
| talks | talks | ||||
| } | } | ||||
| # repairs all tables | |||||
| #' Repairs parsed tables | |||||
| #' | |||||
| #' @export | |||||
| repair <- function(parse_output) { | repair <- function(parse_output) { | ||||
| list(redner = repair_redner(parse_output$redner), | list(redner = repair_redner(parse_output$redner), | ||||
| reden = repair_reden(parse_output$reden), | reden = repair_reden(parse_output$reden), | ||||
| @@ -0,0 +1,15 @@ | |||||
| % Generated by roxygen2: do not edit by hand | |||||
| % Please edit documentation in R/parse.R | |||||
| \name{read_all} | |||||
| \alias{read_all} | |||||
| \title{Parse xml records} | |||||
| \usage{ | |||||
| read_all(path = "records/") | |||||
| } | |||||
| \arguments{ | |||||
| \item{path}{character} | |||||
| } | |||||
| \description{ | |||||
| Creates a list of tibbles containing relevant information from all records | |||||
| stored in the input directory. | |||||
| } | |||||
| @@ -0,0 +1,11 @@ | |||||
| % Generated by roxygen2: do not edit by hand | |||||
| % Please edit documentation in R/repair.R | |||||
| \name{repair} | |||||
| \alias{repair} | |||||
| \title{Repairs parsed tables} | |||||
| \usage{ | |||||
| repair(parse_output) | |||||
| } | |||||
| \description{ | |||||
| Repairs parsed tables | |||||
| } | |||||
| @@ -14,23 +14,36 @@ knitr::opts_chunk$set( | |||||
| ) | ) | ||||
| ``` | ``` | ||||
| ```{r setup} | |||||
| library(hateimparlament) | |||||
| library(dplyr) | |||||
| library(ggplot2) | |||||
| ``` | |||||
| ## Preparation of data | |||||
| First, you need to download all records of the current legislative period. | |||||
| ```r | ```r | ||||
| read_all() %>% repair() -> res | |||||
| fetch_all("../records/") # path to directory where records should be stored | |||||
| ``` | |||||
| Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: | |||||
| ```{r} | |||||
| read_all("../records/") %>% repair() -> res | |||||
| reden <- res$reden | reden <- res$reden | ||||
| redner <- res$redner | redner <- res$redner | ||||
| talks <- res$talks | talks <- res$talks | ||||
| ``` | |||||
| We also used `repair` to fix a bunch of formatting issues in the records and unpacked | |||||
| the result into more descriptive variables. | |||||
| # first tries | |||||
| ## Analysis | |||||
| Now we can start analysing our parsed dataset, e.g. find out which party gives the most talks: | |||||
| ```{r} | |||||
| left_join(reden, redner, by=c("redner" = "id")) %>% | left_join(reden, redner, by=c("redner" = "id")) %>% | ||||
| group_by(fraktion) %>% | group_by(fraktion) %>% | ||||
| summarize(n = n()) %>% | summarize(n = n()) %>% | ||||
| ggplot(aes(x = fraktion, y = n)) + | ggplot(aes(x = fraktion, y = n)) + | ||||
| geom_bar(stat = "identity") | geom_bar(stat = "identity") | ||||
| ``` | ``` | ||||
| ```{r setup} | |||||
| library(hateimparlament) | |||||
| ``` | |||||