Просмотр исходного кода

refactor project structure

genderequality-alternative
JosuaKugler 4 лет назад
Родитель
Сommit
53fdb7530b
23 измененных файлов: 22 добавлений и 19 удалений
  1. +4
    -4
      .gitignore
  2. +4
    -1
      R/fetch.R
  3. +3
    -3
      R/parse.R
  4. +0
    -0
      data/hitler_texts/german_words
  5. +0
    -0
      data/hitler_texts/goebbels_sportpalast
  6. +0
    -0
      data/hitler_texts/hitler_rede_1
  7. +0
    -0
      data/hitler_texts/hitler_rede_2
  8. +0
    -0
      data/hitler_texts/hitler_rede_3
  9. +0
    -0
      data/hitler_texts/hitler_rede_4
  10. +0
    -0
      data/hitler_texts/hitler_rede_5
  11. +0
    -0
      data/hitler_texts/hitler_rede_6
  12. +0
    -0
      data/hitler_texts/hitler_rede_7
  13. +0
    -0
      data/hitler_texts/hitler_words
  14. +0
    -0
      data/hitler_texts/mein_kampf
  15. +0
    -0
      data/hitler_texts/parse.py
  16. +0
    -0
      inst/reports/zwischenbericht.pdf
  17. +0
    -0
      inst/reports/zwischenbericht.tex
  18. +1
    -1
      man/fetch_all.Rd
  19. +1
    -1
      man/read_all.Rd
  20. +1
    -1
      man/read_from_csv.Rd
  21. +1
    -1
      man/write_to_csv.Rd
  22. +3
    -3
      vignettes/funwithdata.Rmd
  23. +4
    -4
      vignettes/hitlercomparison.Rmd

+ 4
- 4
.gitignore Просмотреть файл

@@ -1,10 +1,10 @@
*.xml *.xml
/doc/ /doc/
/Meta/ /Meta/
/reports/
!/reports/*.pdf
!/reports/*.tex
/csv/*
/inst/reports/
!/inst/reports/*.pdf
!/inst/reports/*.tex
/data/csv/*
/parlament_49_53_texts/ /parlament_49_53_texts/
.Rproj.user .Rproj.user
*.Rproj *.Rproj


+ 4
- 1
R/fetch.R Просмотреть файл

@@ -36,9 +36,12 @@ fetch_batch <- function(offset, download_dir) {
#' This fetches all available records of the 19th legislative period of the german Bundestag. #' This fetches all available records of the 19th legislative period of the german Bundestag.
#' #'
#' @param download_dir character #' @param download_dir character
#' @param create bool
#'
#' if create is TRUE, the directory given in download_dir is created
#' #'
#' @export #' @export
fetch_all <- function(download_dir="records/", create=FALSE) {
fetch_all <- function(download_dir="data/records/", create=FALSE) {
# check if download_dir path is a directory path # check if download_dir path is a directory path
if (str_sub(download_dir, -1) != .Platform$file.sep) if (str_sub(download_dir, -1) != .Platform$file.sep)
download_dir <- str_c(download_dir, .Platform$file.sep) download_dir <- str_c(download_dir, .Platform$file.sep)


+ 3
- 3
R/parse.R Просмотреть файл

@@ -8,7 +8,7 @@
#' @param path character #' @param path character
#' #'
#' @export #' @export
read_all <- function(path="records/") {
read_all <- function(path="data/records/") {
cat("Reading all records from", path, "\n") cat("Reading all records from", path, "\n")
available_protocols <- list.files(path) available_protocols <- list.files(path)
res <- pblapply(available_protocols, read_one, path=path) res <- pblapply(available_protocols, read_one, path=path)
@@ -214,7 +214,7 @@ parse_speakerlist <- function(speakerliste_xml) {
#' if create is set to TRUE, the directory given in path is created #' if create is set to TRUE, the directory given in path is created
#' #'
#' @export #' @export
write_to_csv <- function(tables, path="csv/", create=F) {
write_to_csv <- function(tables, path="data/csv/", create=F) {
check_directory(path, create) check_directory(path, create)
write.table(tables$speaker, str_c(path, "speaker.csv")) write.table(tables$speaker, str_c(path, "speaker.csv"))
write.table(tables$speeches, str_c(path, "speeches.csv")) write.table(tables$speeches, str_c(path, "speeches.csv"))
@@ -230,7 +230,7 @@ write_to_csv <- function(tables, path="csv/", create=F) {
#' Reading the tables from a csv is way faster than reading and repairing the data every single time #' Reading the tables from a csv is way faster than reading and repairing the data every single time
#' #'
#' @export #' @export
read_from_csv <- function(path="csv/") {
read_from_csv <- function(path="data/csv/") {
list(speaker = read.table(str_c(path, "speaker.csv")) %>% list(speaker = read.table(str_c(path, "speaker.csv")) %>%
tibble() %>% tibble() %>%
mutate(id = as.character(id)), mutate(id = as.character(id)),


hitler_texts/german_words → data/hitler_texts/german_words Просмотреть файл


hitler_texts/goebbels_sportpalast → data/hitler_texts/goebbels_sportpalast Просмотреть файл


hitler_texts/hitler_rede_1 → data/hitler_texts/hitler_rede_1 Просмотреть файл


hitler_texts/hitler_rede_2 → data/hitler_texts/hitler_rede_2 Просмотреть файл


hitler_texts/hitler_rede_3 → data/hitler_texts/hitler_rede_3 Просмотреть файл


hitler_texts/hitler_rede_4 → data/hitler_texts/hitler_rede_4 Просмотреть файл


hitler_texts/hitler_rede_5 → data/hitler_texts/hitler_rede_5 Просмотреть файл


hitler_texts/hitler_rede_6 → data/hitler_texts/hitler_rede_6 Просмотреть файл


hitler_texts/hitler_rede_7 → data/hitler_texts/hitler_rede_7 Просмотреть файл


hitler_texts/hitler_words → data/hitler_texts/hitler_words Просмотреть файл


hitler_texts/mein_kampf → data/hitler_texts/mein_kampf Просмотреть файл


hitler_texts/parse.py → data/hitler_texts/parse.py Просмотреть файл


reports/zwischenbericht.pdf → inst/reports/zwischenbericht.pdf Просмотреть файл


reports/zwischenbericht.tex → inst/reports/zwischenbericht.tex Просмотреть файл


+ 1
- 1
man/fetch_all.Rd Просмотреть файл

@@ -4,7 +4,7 @@
\alias{fetch_all} \alias{fetch_all}
\title{Download available records} \title{Download available records}
\usage{ \usage{
fetch_all(download_dir = "records/", create = FALSE)
fetch_all(download_dir = "data/records/", create = FALSE)
} }
\arguments{ \arguments{
\item{download_dir}{character} \item{download_dir}{character}


+ 1
- 1
man/read_all.Rd Просмотреть файл

@@ -4,7 +4,7 @@
\alias{read_all} \alias{read_all}
\title{Parse xml records} \title{Parse xml records}
\usage{ \usage{
read_all(path = "records/")
read_all(path = "data/records/")
} }
\arguments{ \arguments{
\item{path}{character} \item{path}{character}


+ 1
- 1
man/read_from_csv.Rd Просмотреть файл

@@ -4,7 +4,7 @@
\alias{read_from_csv} \alias{read_from_csv}
\title{Read the needed tables for developing from a csv file.} \title{Read the needed tables for developing from a csv file.}
\usage{ \usage{
read_from_csv(path = "csv/")
read_from_csv(path = "data/csv/")
} }
\arguments{ \arguments{
\item{path}{char \item{path}{char


+ 1
- 1
man/write_to_csv.Rd Просмотреть файл

@@ -4,7 +4,7 @@
\alias{write_to_csv} \alias{write_to_csv}
\title{Write the parsed and repaired results into a csv file to make loading and developing faster and easier} \title{Write the parsed and repaired results into a csv file to make loading and developing faster and easier}
\usage{ \usage{
write_to_csv(tables, path = "csv/", create = F)
write_to_csv(tables, path = "data/csv/", create = F)
} }
\arguments{ \arguments{
\item{tables}{tibble list} \item{tables}{tibble list}


+ 3
- 3
vignettes/funwithdata.Rmd Просмотреть файл

@@ -26,18 +26,18 @@ library(tidyr)


First, you need to download all records of the current legislative period. First, you need to download all records of the current legislative period.
```r ```r
fetch_all("../records/") # path to directory where records should be stored
fetch_all("../data/records/") # path to directory where records should be stored
``` ```
Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
```r ```r
read_all("../records/") %>% repair() -> res
read_all("../data/records/") %>% repair() -> res
``` ```
We also used `repair` to fix a bunch of formatting issues in the records and unpacked We also used `repair` to fix a bunch of formatting issues in the records and unpacked
the result into more descriptive variables. the result into more descriptive variables.


For development purposes, we load the tables from csv files. For development purposes, we load the tables from csv files.
```{r} ```{r}
res <- read_from_csv('../csv/')
res <- read_from_csv('../data/csv/')
``` ```
and unpack our tibbles and unpack our tibbles
```{r} ```{r}


+ 4
- 4
vignettes/hitlercomparison.Rmd Просмотреть файл

@@ -25,11 +25,11 @@ library(ggplot2)


First, you need to download all records of the current legislative period. First, you need to download all records of the current legislative period.
```r ```r
fetch_all("../records/") # path to directory where records should be stored
fetch_all("../data/records/") # path to directory where records should be stored
``` ```
Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
```r ```r
read_all("../records/") %>% repair() -> res
read_all("../data/records/") %>% repair() -> res


speeches <- res$speeches speeches <- res$speeches
speaker <- res$speaker speaker <- res$speaker
@@ -40,7 +40,7 @@ the result into more descriptive variables.


For development purposes, we load the tables from csv files. For development purposes, we load the tables from csv files.
```{r} ```{r}
tables <- read_from_csv('../csv/')
tables <- read_from_csv('../data/csv/')


comments <- tables$comments comments <- tables$comments
speeches <- tables$speeches speeches <- tables$speeches
@@ -50,7 +50,7 @@ talks <- tables$talks


Further, we need to load a list of words that were used by Hitler but not by standard German texts. Further, we need to load a list of words that were used by Hitler but not by standard German texts.
```{r} ```{r}
fil <- file('../hitler_texts/hitler_words')
fil <- file('../data/hitler_texts/hitler_words')
Worte <- readLines(fil) Worte <- readLines(fil)
hitlerwords <- tibble(Worte) hitlerwords <- tibble(Worte)
``` ```


Загрузка…
Отмена
Сохранить