| @@ -1,10 +1,10 @@ | |||||
| *.xml | *.xml | ||||
| /doc/ | /doc/ | ||||
| /Meta/ | /Meta/ | ||||
| /reports/ | |||||
| !/reports/*.pdf | |||||
| !/reports/*.tex | |||||
| /csv/* | |||||
| /inst/reports/ | |||||
| !/inst/reports/*.pdf | |||||
| !/inst/reports/*.tex | |||||
| /data/csv/* | |||||
| /parlament_49_53_texts/ | /parlament_49_53_texts/ | ||||
| .Rproj.user | .Rproj.user | ||||
| *.Rproj | *.Rproj | ||||
| @@ -36,9 +36,12 @@ fetch_batch <- function(offset, download_dir) { | |||||
| #' This fetches all available records of the 19th legislative period of the german Bundestag. | #' This fetches all available records of the 19th legislative period of the german Bundestag. | ||||
| #' | #' | ||||
| #' @param download_dir character | #' @param download_dir character | ||||
| #' @param create bool | |||||
| #' | |||||
| #' if create is TRUE, the directory given in download_dir is created | |||||
| #' | #' | ||||
| #' @export | #' @export | ||||
| fetch_all <- function(download_dir="records/", create=FALSE) { | |||||
| fetch_all <- function(download_dir="data/records/", create=FALSE) { | |||||
| # check if download_dir path is a directory path | # check if download_dir path is a directory path | ||||
| if (str_sub(download_dir, -1) != .Platform$file.sep) | if (str_sub(download_dir, -1) != .Platform$file.sep) | ||||
| download_dir <- str_c(download_dir, .Platform$file.sep) | download_dir <- str_c(download_dir, .Platform$file.sep) | ||||
| @@ -8,7 +8,7 @@ | |||||
| #' @param path character | #' @param path character | ||||
| #' | #' | ||||
| #' @export | #' @export | ||||
| read_all <- function(path="records/") { | |||||
| read_all <- function(path="data/records/") { | |||||
| cat("Reading all records from", path, "\n") | cat("Reading all records from", path, "\n") | ||||
| available_protocols <- list.files(path) | available_protocols <- list.files(path) | ||||
| res <- pblapply(available_protocols, read_one, path=path) | res <- pblapply(available_protocols, read_one, path=path) | ||||
| @@ -214,7 +214,7 @@ parse_speakerlist <- function(speakerliste_xml) { | |||||
| #' if create is set to TRUE, the directory given in path is created | #' if create is set to TRUE, the directory given in path is created | ||||
| #' | #' | ||||
| #' @export | #' @export | ||||
| write_to_csv <- function(tables, path="csv/", create=F) { | |||||
| write_to_csv <- function(tables, path="data/csv/", create=F) { | |||||
| check_directory(path, create) | check_directory(path, create) | ||||
| write.table(tables$speaker, str_c(path, "speaker.csv")) | write.table(tables$speaker, str_c(path, "speaker.csv")) | ||||
| write.table(tables$speeches, str_c(path, "speeches.csv")) | write.table(tables$speeches, str_c(path, "speeches.csv")) | ||||
| @@ -230,7 +230,7 @@ write_to_csv <- function(tables, path="csv/", create=F) { | |||||
| #' Reading the tables from a csv is way faster than reading and repairing the data every single time | #' Reading the tables from a csv is way faster than reading and repairing the data every single time | ||||
| #' | #' | ||||
| #' @export | #' @export | ||||
| read_from_csv <- function(path="csv/") { | |||||
| read_from_csv <- function(path="data/csv/") { | |||||
| list(speaker = read.table(str_c(path, "speaker.csv")) %>% | list(speaker = read.table(str_c(path, "speaker.csv")) %>% | ||||
| tibble() %>% | tibble() %>% | ||||
| mutate(id = as.character(id)), | mutate(id = as.character(id)), | ||||
| @@ -4,7 +4,7 @@ | |||||
| \alias{fetch_all} | \alias{fetch_all} | ||||
| \title{Download available records} | \title{Download available records} | ||||
| \usage{ | \usage{ | ||||
| fetch_all(download_dir = "records/", create = FALSE) | |||||
| fetch_all(download_dir = "data/records/", create = FALSE) | |||||
| } | } | ||||
| \arguments{ | \arguments{ | ||||
| \item{download_dir}{character} | \item{download_dir}{character} | ||||
| @@ -4,7 +4,7 @@ | |||||
| \alias{read_all} | \alias{read_all} | ||||
| \title{Parse xml records} | \title{Parse xml records} | ||||
| \usage{ | \usage{ | ||||
| read_all(path = "records/") | |||||
| read_all(path = "data/records/") | |||||
| } | } | ||||
| \arguments{ | \arguments{ | ||||
| \item{path}{character} | \item{path}{character} | ||||
| @@ -4,7 +4,7 @@ | |||||
| \alias{read_from_csv} | \alias{read_from_csv} | ||||
| \title{Read the needed tables for developing from a csv file.} | \title{Read the needed tables for developing from a csv file.} | ||||
| \usage{ | \usage{ | ||||
| read_from_csv(path = "csv/") | |||||
| read_from_csv(path = "data/csv/") | |||||
| } | } | ||||
| \arguments{ | \arguments{ | ||||
| \item{path}{char | \item{path}{char | ||||
| @@ -4,7 +4,7 @@ | |||||
| \alias{write_to_csv} | \alias{write_to_csv} | ||||
| \title{Write the parsed and repaired results into a csv file to make loading and developing faster and easier} | \title{Write the parsed and repaired results into a csv file to make loading and developing faster and easier} | ||||
| \usage{ | \usage{ | ||||
| write_to_csv(tables, path = "csv/", create = F) | |||||
| write_to_csv(tables, path = "data/csv/", create = F) | |||||
| } | } | ||||
| \arguments{ | \arguments{ | ||||
| \item{tables}{tibble list} | \item{tables}{tibble list} | ||||
| @@ -26,18 +26,18 @@ library(tidyr) | |||||
| First, you need to download all records of the current legislative period. | First, you need to download all records of the current legislative period. | ||||
| ```r | ```r | ||||
| fetch_all("../records/") # path to directory where records should be stored | |||||
| fetch_all("../data/records/") # path to directory where records should be stored | |||||
| ``` | ``` | ||||
| Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: | Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: | ||||
| ```r | ```r | ||||
| read_all("../records/") %>% repair() -> res | |||||
| read_all("../data/records/") %>% repair() -> res | |||||
| ``` | ``` | ||||
| We also used `repair` to fix a bunch of formatting issues in the records and unpacked | We also used `repair` to fix a bunch of formatting issues in the records and unpacked | ||||
| the result into more descriptive variables. | the result into more descriptive variables. | ||||
| For development purposes, we load the tables from csv files. | For development purposes, we load the tables from csv files. | ||||
| ```{r} | ```{r} | ||||
| res <- read_from_csv('../csv/') | |||||
| res <- read_from_csv('../data/csv/') | |||||
| ``` | ``` | ||||
| and unpack our tibbles | and unpack our tibbles | ||||
| ```{r} | ```{r} | ||||
| @@ -25,11 +25,11 @@ library(ggplot2) | |||||
| First, you need to download all records of the current legislative period. | First, you need to download all records of the current legislative period. | ||||
| ```r | ```r | ||||
| fetch_all("../records/") # path to directory where records should be stored | |||||
| fetch_all("../data/records/") # path to directory where records should be stored | |||||
| ``` | ``` | ||||
| Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: | Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: | ||||
| ```r | ```r | ||||
| read_all("../records/") %>% repair() -> res | |||||
| read_all("../data/records/") %>% repair() -> res | |||||
| speeches <- res$speeches | speeches <- res$speeches | ||||
| speaker <- res$speaker | speaker <- res$speaker | ||||
| @@ -40,7 +40,7 @@ the result into more descriptive variables. | |||||
| For development purposes, we load the tables from csv files. | For development purposes, we load the tables from csv files. | ||||
| ```{r} | ```{r} | ||||
| tables <- read_from_csv('../csv/') | |||||
| tables <- read_from_csv('../data/csv/') | |||||
| comments <- tables$comments | comments <- tables$comments | ||||
| speeches <- tables$speeches | speeches <- tables$speeches | ||||
| @@ -50,7 +50,7 @@ talks <- tables$talks | |||||
| Further, we need to load a list of words that were used by Hitler but not by standard German texts. | Further, we need to load a list of words that were used by Hitler but not by standard German texts. | ||||
| ```{r} | ```{r} | ||||
| fil <- file('../hitler_texts/hitler_words') | |||||
| fil <- file('../data/hitler_texts/hitler_words') | |||||
| Worte <- readLines(fil) | Worte <- readLines(fil) | ||||
| hitlerwords <- tibble(Worte) | hitlerwords <- tibble(Worte) | ||||
| ``` | ``` | ||||