refactor project structure

4 年前 · 53fdb7530b
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,10 @@
 *.xml
 /doc/
 /Meta/
 /reports/
 !/reports/*.pdf
 !/reports/*.tex
 /csv/*
 /inst/reports/
 !/inst/reports/*.pdf
 !/inst/reports/*.tex
 /data/csv/*
 /parlament_49_53_texts/
 .Rproj.user
 *.Rproj
--- a/R/fetch.R
+++ b/R/fetch.R
@@ -36,9 +36,12 @@ fetch_batch <- function(offset, download_dir) {
 #' This fetches all available records of the 19th legislative period of the german Bundestag.
 #'
 #' @param download_dir character
 #' @param create bool
 #' 
 #' if create is TRUE, the directory given in download_dir is created
 #'
 #' @export
 fetch_all <- function(download_dir="records/", create=FALSE) {
 fetch_all <- function(download_dir="data/records/", create=FALSE) {
    # check if download_dir path is a directory path
    if (str_sub(download_dir, -1) != .Platform$file.sep)
        download_dir <- str_c(download_dir, .Platform$file.sep)
--- a/R/parse.R
+++ b/R/parse.R
@@ -8,7 +8,7 @@
 #' @param path character
 #'
 #' @export
 read_all <- function(path="records/") {
 read_all <- function(path="data/records/") {
    cat("Reading all records from", path, "\n")
    available_protocols <- list.files(path)
    res <- pblapply(available_protocols, read_one, path=path)
@@ -214,7 +214,7 @@ parse_speakerlist <- function(speakerliste_xml) {
 #' if create is set to TRUE, the directory given in path is created
 #' 
 #' @export
 write_to_csv <- function(tables, path="csv/", create=F) {
 write_to_csv <- function(tables, path="data/csv/", create=F) {
    check_directory(path, create)
    write.table(tables$speaker, str_c(path, "speaker.csv"))
    write.table(tables$speeches, str_c(path, "speeches.csv"))
@@ -230,7 +230,7 @@ write_to_csv <- function(tables, path="csv/", create=F) {
 #' Reading the tables from a csv is way faster than reading and repairing the data every single time
 #' 
 #' @export
 read_from_csv <- function(path="csv/") {
 read_from_csv <- function(path="data/csv/") {
    list(speaker = read.table(str_c(path, "speaker.csv")) %>%
             tibble() %>%
             mutate(id = as.character(id)),
--- a/data/hitler_texts/german_words
+++ b/data/hitler_texts/german_words
--- a/data/hitler_texts/goebbels_sportpalast
+++ b/data/hitler_texts/goebbels_sportpalast
--- a/data/hitler_texts/hitler_rede_1
+++ b/data/hitler_texts/hitler_rede_1
--- a/data/hitler_texts/hitler_rede_2
+++ b/data/hitler_texts/hitler_rede_2
--- a/data/hitler_texts/hitler_rede_3
+++ b/data/hitler_texts/hitler_rede_3
--- a/data/hitler_texts/hitler_rede_4
+++ b/data/hitler_texts/hitler_rede_4
--- a/data/hitler_texts/hitler_rede_5
+++ b/data/hitler_texts/hitler_rede_5
--- a/data/hitler_texts/hitler_rede_6
+++ b/data/hitler_texts/hitler_rede_6
--- a/data/hitler_texts/hitler_rede_7
+++ b/data/hitler_texts/hitler_rede_7
--- a/data/hitler_texts/hitler_words
+++ b/data/hitler_texts/hitler_words
--- a/data/hitler_texts/mein_kampf
+++ b/data/hitler_texts/mein_kampf
--- a/data/hitler_texts/parse.py
+++ b/data/hitler_texts/parse.py
--- a/inst/reports/zwischenbericht.pdf
+++ b/inst/reports/zwischenbericht.pdf
--- a/inst/reports/zwischenbericht.tex
+++ b/inst/reports/zwischenbericht.tex
--- a/man/fetch_all.Rd
+++ b/man/fetch_all.Rd
@@ -4,7 +4,7 @@
 \alias{fetch_all}
 \title{Download available records}
 \usage{
 fetch_all(download_dir = "records/", create = FALSE)
 fetch_all(download_dir = "data/records/", create = FALSE)
 }
 \arguments{
 \item{download_dir}{character}
--- a/man/read_all.Rd
+++ b/man/read_all.Rd
@@ -4,7 +4,7 @@
 \alias{read_all}
 \title{Parse xml records}
 \usage{
 read_all(path = "records/")
 read_all(path = "data/records/")
 }
 \arguments{
 \item{path}{character}
--- a/man/read_from_csv.Rd
+++ b/man/read_from_csv.Rd
@@ -4,7 +4,7 @@
 \alias{read_from_csv}
 \title{Read the needed tables for developing from a csv file.}
 \usage{
 read_from_csv(path = "csv/")
 read_from_csv(path = "data/csv/")
 }
 \arguments{
 \item{path}{char
--- a/man/write_to_csv.Rd
+++ b/man/write_to_csv.Rd
@@ -4,7 +4,7 @@
 \alias{write_to_csv}
 \title{Write the parsed and repaired results into a csv file to make loading and developing faster and easier}
 \usage{
 write_to_csv(tables, path = "csv/", create = F)
 write_to_csv(tables, path = "data/csv/", create = F)
 }
 \arguments{
 \item{tables}{tibble list}
--- a/vignettes/funwithdata.Rmd
+++ b/vignettes/funwithdata.Rmd
@@ -26,18 +26,18 @@ library(tidyr)

 First, you need to download all records of the current legislative period.
 ```r
 fetch_all("../records/") # path to directory where records should be stored
 fetch_all("../data/records/") # path to directory where records should be stored
 ```
 Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
 ```r
 read_all("../records/") %>% repair() -> res
 read_all("../data/records/") %>% repair() -> res
 ```
 We also used `repair` to fix a bunch of formatting issues in the records and unpacked
 the result into more descriptive variables.

 For development purposes, we load the tables from csv files.
 ```{r}
 res <- read_from_csv('../csv/')
 res <- read_from_csv('../data/csv/')
 ```
 and unpack our tibbles
 ```{r}
--- a/vignettes/hitlercomparison.Rmd
+++ b/vignettes/hitlercomparison.Rmd
@@ -25,11 +25,11 @@ library(ggplot2)

 First, you need to download all records of the current legislative period.
 ```r
 fetch_all("../records/") # path to directory where records should be stored
 fetch_all("../data/records/") # path to directory where records should be stored
 ```
 Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
 ```r
 read_all("../records/") %>% repair() -> res
 read_all("../data/records/") %>% repair() -> res

 speeches <- res$speeches
 speaker <- res$speaker
@@ -40,7 +40,7 @@ the result into more descriptive variables.

 For development purposes, we load the tables from csv files.
 ```{r}
 tables <- read_from_csv('../csv/')
 tables <- read_from_csv('../data/csv/')

 comments <- tables$comments
 speeches <- tables$speeches
@@ -50,7 +50,7 @@ talks <- tables$talks

 Further, we need to load a list of words that were used by Hitler but not by standard German texts.
 ```{r}
 fil <- file('../hitler_texts/hitler_words')
 fil <- file('../data/hitler_texts/hitler_words')
 Worte <- readLines(fil)
 hitlerwords <- tibble(Worte)
 ```