diff --git a/.gitignore b/.gitignore index e5ec99b..c77aadd 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,10 @@ *.xml /doc/ /Meta/ -/reports/ -!/reports/*.pdf -!/reports/*.tex -/csv/* +/inst/reports/ +!/inst/reports/*.pdf +!/inst/reports/*.tex +/data/csv/* /parlament_49_53_texts/ .Rproj.user *.Rproj diff --git a/R/fetch.R b/R/fetch.R index 09f9cfe..9482eb0 100644 --- a/R/fetch.R +++ b/R/fetch.R @@ -36,9 +36,12 @@ fetch_batch <- function(offset, download_dir) { #' This fetches all available records of the 19th legislative period of the german Bundestag. #' #' @param download_dir character +#' @param create bool +#' +#' if create is TRUE, the directory given in download_dir is created #' #' @export -fetch_all <- function(download_dir="records/", create=FALSE) { +fetch_all <- function(download_dir="data/records/", create=FALSE) { # check if download_dir path is a directory path if (str_sub(download_dir, -1) != .Platform$file.sep) download_dir <- str_c(download_dir, .Platform$file.sep) diff --git a/R/parse.R b/R/parse.R index 92fd215..e9c75e9 100644 --- a/R/parse.R +++ b/R/parse.R @@ -8,7 +8,7 @@ #' @param path character #' #' @export -read_all <- function(path="records/") { +read_all <- function(path="data/records/") { cat("Reading all records from", path, "\n") available_protocols <- list.files(path) res <- pblapply(available_protocols, read_one, path=path) @@ -214,7 +214,7 @@ parse_speakerlist <- function(speakerliste_xml) { #' if create is set to TRUE, the directory given in path is created #' #' @export -write_to_csv <- function(tables, path="csv/", create=F) { +write_to_csv <- function(tables, path="data/csv/", create=F) { check_directory(path, create) write.table(tables$speaker, str_c(path, "speaker.csv")) write.table(tables$speeches, str_c(path, "speeches.csv")) @@ -230,7 +230,7 @@ write_to_csv <- function(tables, path="csv/", create=F) { #' Reading the tables from a csv is way faster than reading and repairing the data every single time #' #' @export -read_from_csv <- function(path="csv/") { +read_from_csv <- function(path="data/csv/") { list(speaker = read.table(str_c(path, "speaker.csv")) %>% tibble() %>% mutate(id = as.character(id)), diff --git a/hitler_texts/german_words b/data/hitler_texts/german_words similarity index 100% rename from hitler_texts/german_words rename to data/hitler_texts/german_words diff --git a/hitler_texts/goebbels_sportpalast b/data/hitler_texts/goebbels_sportpalast similarity index 100% rename from hitler_texts/goebbels_sportpalast rename to data/hitler_texts/goebbels_sportpalast diff --git a/hitler_texts/hitler_rede_1 b/data/hitler_texts/hitler_rede_1 similarity index 100% rename from hitler_texts/hitler_rede_1 rename to data/hitler_texts/hitler_rede_1 diff --git a/hitler_texts/hitler_rede_2 b/data/hitler_texts/hitler_rede_2 similarity index 100% rename from hitler_texts/hitler_rede_2 rename to data/hitler_texts/hitler_rede_2 diff --git a/hitler_texts/hitler_rede_3 b/data/hitler_texts/hitler_rede_3 similarity index 100% rename from hitler_texts/hitler_rede_3 rename to data/hitler_texts/hitler_rede_3 diff --git a/hitler_texts/hitler_rede_4 b/data/hitler_texts/hitler_rede_4 similarity index 100% rename from hitler_texts/hitler_rede_4 rename to data/hitler_texts/hitler_rede_4 diff --git a/hitler_texts/hitler_rede_5 b/data/hitler_texts/hitler_rede_5 similarity index 100% rename from hitler_texts/hitler_rede_5 rename to data/hitler_texts/hitler_rede_5 diff --git a/hitler_texts/hitler_rede_6 b/data/hitler_texts/hitler_rede_6 similarity index 100% rename from hitler_texts/hitler_rede_6 rename to data/hitler_texts/hitler_rede_6 diff --git a/hitler_texts/hitler_rede_7 b/data/hitler_texts/hitler_rede_7 similarity index 100% rename from hitler_texts/hitler_rede_7 rename to data/hitler_texts/hitler_rede_7 diff --git a/hitler_texts/hitler_words b/data/hitler_texts/hitler_words similarity index 100% rename from hitler_texts/hitler_words rename to data/hitler_texts/hitler_words diff --git a/hitler_texts/mein_kampf b/data/hitler_texts/mein_kampf similarity index 100% rename from hitler_texts/mein_kampf rename to data/hitler_texts/mein_kampf diff --git a/hitler_texts/parse.py b/data/hitler_texts/parse.py similarity index 100% rename from hitler_texts/parse.py rename to data/hitler_texts/parse.py diff --git a/reports/zwischenbericht.pdf b/inst/reports/zwischenbericht.pdf similarity index 100% rename from reports/zwischenbericht.pdf rename to inst/reports/zwischenbericht.pdf diff --git a/reports/zwischenbericht.tex b/inst/reports/zwischenbericht.tex similarity index 100% rename from reports/zwischenbericht.tex rename to inst/reports/zwischenbericht.tex diff --git a/man/fetch_all.Rd b/man/fetch_all.Rd index 344bddc..694e97d 100644 --- a/man/fetch_all.Rd +++ b/man/fetch_all.Rd @@ -4,7 +4,7 @@ \alias{fetch_all} \title{Download available records} \usage{ -fetch_all(download_dir = "records/", create = FALSE) +fetch_all(download_dir = "data/records/", create = FALSE) } \arguments{ \item{download_dir}{character} diff --git a/man/read_all.Rd b/man/read_all.Rd index 65ad22c..b29d11d 100644 --- a/man/read_all.Rd +++ b/man/read_all.Rd @@ -4,7 +4,7 @@ \alias{read_all} \title{Parse xml records} \usage{ -read_all(path = "records/") +read_all(path = "data/records/") } \arguments{ \item{path}{character} diff --git a/man/read_from_csv.Rd b/man/read_from_csv.Rd index 4bf7e55..3244c82 100644 --- a/man/read_from_csv.Rd +++ b/man/read_from_csv.Rd @@ -4,7 +4,7 @@ \alias{read_from_csv} \title{Read the needed tables for developing from a csv file.} \usage{ -read_from_csv(path = "csv/") +read_from_csv(path = "data/csv/") } \arguments{ \item{path}{char diff --git a/man/write_to_csv.Rd b/man/write_to_csv.Rd index 649bb29..cd7f200 100644 --- a/man/write_to_csv.Rd +++ b/man/write_to_csv.Rd @@ -4,7 +4,7 @@ \alias{write_to_csv} \title{Write the parsed and repaired results into a csv file to make loading and developing faster and easier} \usage{ -write_to_csv(tables, path = "csv/", create = F) +write_to_csv(tables, path = "data/csv/", create = F) } \arguments{ \item{tables}{tibble list} diff --git a/vignettes/funwithdata.Rmd b/vignettes/funwithdata.Rmd index b9a2e69..5bf5fee 100644 --- a/vignettes/funwithdata.Rmd +++ b/vignettes/funwithdata.Rmd @@ -26,18 +26,18 @@ library(tidyr) First, you need to download all records of the current legislative period. ```r -fetch_all("../records/") # path to directory where records should be stored +fetch_all("../data/records/") # path to directory where records should be stored ``` Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: ```r -read_all("../records/") %>% repair() -> res +read_all("../data/records/") %>% repair() -> res ``` We also used `repair` to fix a bunch of formatting issues in the records and unpacked the result into more descriptive variables. For development purposes, we load the tables from csv files. ```{r} -res <- read_from_csv('../csv/') +res <- read_from_csv('../data/csv/') ``` and unpack our tibbles ```{r} diff --git a/vignettes/hitlercomparison.Rmd b/vignettes/hitlercomparison.Rmd index 13d5b01..9dde553 100644 --- a/vignettes/hitlercomparison.Rmd +++ b/vignettes/hitlercomparison.Rmd @@ -25,11 +25,11 @@ library(ggplot2) First, you need to download all records of the current legislative period. ```r -fetch_all("../records/") # path to directory where records should be stored +fetch_all("../data/records/") # path to directory where records should be stored ``` Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: ```r -read_all("../records/") %>% repair() -> res +read_all("../data/records/") %>% repair() -> res speeches <- res$speeches speaker <- res$speaker @@ -40,7 +40,7 @@ the result into more descriptive variables. For development purposes, we load the tables from csv files. ```{r} -tables <- read_from_csv('../csv/') +tables <- read_from_csv('../data/csv/') comments <- tables$comments speeches <- tables$speeches @@ -50,7 +50,7 @@ talks <- tables$talks Further, we need to load a list of words that were used by Hitler but not by standard German texts. ```{r} -fil <- file('../hitler_texts/hitler_words') +fil <- file('../data/hitler_texts/hitler_words') Worte <- readLines(fil) hitlerwords <- tibble(Worte) ```