浏览代码

refactor project structure

genderequality-alternative
JosuaKugler 4 年前
父节点
当前提交
53fdb7530b
共有 23 个文件被更改,包括 22 次插入19 次删除
  1. +4
    -4
      .gitignore
  2. +4
    -1
      R/fetch.R
  3. +3
    -3
      R/parse.R
  4. +0
    -0
      data/hitler_texts/german_words
  5. +0
    -0
      data/hitler_texts/goebbels_sportpalast
  6. +0
    -0
      data/hitler_texts/hitler_rede_1
  7. +0
    -0
      data/hitler_texts/hitler_rede_2
  8. +0
    -0
      data/hitler_texts/hitler_rede_3
  9. +0
    -0
      data/hitler_texts/hitler_rede_4
  10. +0
    -0
      data/hitler_texts/hitler_rede_5
  11. +0
    -0
      data/hitler_texts/hitler_rede_6
  12. +0
    -0
      data/hitler_texts/hitler_rede_7
  13. +0
    -0
      data/hitler_texts/hitler_words
  14. +0
    -0
      data/hitler_texts/mein_kampf
  15. +0
    -0
      data/hitler_texts/parse.py
  16. +0
    -0
      inst/reports/zwischenbericht.pdf
  17. +0
    -0
      inst/reports/zwischenbericht.tex
  18. +1
    -1
      man/fetch_all.Rd
  19. +1
    -1
      man/read_all.Rd
  20. +1
    -1
      man/read_from_csv.Rd
  21. +1
    -1
      man/write_to_csv.Rd
  22. +3
    -3
      vignettes/funwithdata.Rmd
  23. +4
    -4
      vignettes/hitlercomparison.Rmd

+ 4
- 4
.gitignore 查看文件

@@ -1,10 +1,10 @@
*.xml
/doc/
/Meta/
/reports/
!/reports/*.pdf
!/reports/*.tex
/csv/*
/inst/reports/
!/inst/reports/*.pdf
!/inst/reports/*.tex
/data/csv/*
/parlament_49_53_texts/
.Rproj.user
*.Rproj


+ 4
- 1
R/fetch.R 查看文件

@@ -36,9 +36,12 @@ fetch_batch <- function(offset, download_dir) {
#' This fetches all available records of the 19th legislative period of the german Bundestag.
#'
#' @param download_dir character
#' @param create bool
#'
#' if create is TRUE, the directory given in download_dir is created
#'
#' @export
fetch_all <- function(download_dir="records/", create=FALSE) {
fetch_all <- function(download_dir="data/records/", create=FALSE) {
# check if download_dir path is a directory path
if (str_sub(download_dir, -1) != .Platform$file.sep)
download_dir <- str_c(download_dir, .Platform$file.sep)


+ 3
- 3
R/parse.R 查看文件

@@ -8,7 +8,7 @@
#' @param path character
#'
#' @export
read_all <- function(path="records/") {
read_all <- function(path="data/records/") {
cat("Reading all records from", path, "\n")
available_protocols <- list.files(path)
res <- pblapply(available_protocols, read_one, path=path)
@@ -214,7 +214,7 @@ parse_speakerlist <- function(speakerliste_xml) {
#' if create is set to TRUE, the directory given in path is created
#'
#' @export
write_to_csv <- function(tables, path="csv/", create=F) {
write_to_csv <- function(tables, path="data/csv/", create=F) {
check_directory(path, create)
write.table(tables$speaker, str_c(path, "speaker.csv"))
write.table(tables$speeches, str_c(path, "speeches.csv"))
@@ -230,7 +230,7 @@ write_to_csv <- function(tables, path="csv/", create=F) {
#' Reading the tables from a csv is way faster than reading and repairing the data every single time
#'
#' @export
read_from_csv <- function(path="csv/") {
read_from_csv <- function(path="data/csv/") {
list(speaker = read.table(str_c(path, "speaker.csv")) %>%
tibble() %>%
mutate(id = as.character(id)),


hitler_texts/german_words → data/hitler_texts/german_words 查看文件


hitler_texts/goebbels_sportpalast → data/hitler_texts/goebbels_sportpalast 查看文件


hitler_texts/hitler_rede_1 → data/hitler_texts/hitler_rede_1 查看文件


hitler_texts/hitler_rede_2 → data/hitler_texts/hitler_rede_2 查看文件


hitler_texts/hitler_rede_3 → data/hitler_texts/hitler_rede_3 查看文件


hitler_texts/hitler_rede_4 → data/hitler_texts/hitler_rede_4 查看文件


hitler_texts/hitler_rede_5 → data/hitler_texts/hitler_rede_5 查看文件


hitler_texts/hitler_rede_6 → data/hitler_texts/hitler_rede_6 查看文件


hitler_texts/hitler_rede_7 → data/hitler_texts/hitler_rede_7 查看文件


hitler_texts/hitler_words → data/hitler_texts/hitler_words 查看文件


hitler_texts/mein_kampf → data/hitler_texts/mein_kampf 查看文件


hitler_texts/parse.py → data/hitler_texts/parse.py 查看文件


reports/zwischenbericht.pdf → inst/reports/zwischenbericht.pdf 查看文件


reports/zwischenbericht.tex → inst/reports/zwischenbericht.tex 查看文件


+ 1
- 1
man/fetch_all.Rd 查看文件

@@ -4,7 +4,7 @@
\alias{fetch_all}
\title{Download available records}
\usage{
fetch_all(download_dir = "records/", create = FALSE)
fetch_all(download_dir = "data/records/", create = FALSE)
}
\arguments{
\item{download_dir}{character}


+ 1
- 1
man/read_all.Rd 查看文件

@@ -4,7 +4,7 @@
\alias{read_all}
\title{Parse xml records}
\usage{
read_all(path = "records/")
read_all(path = "data/records/")
}
\arguments{
\item{path}{character}


+ 1
- 1
man/read_from_csv.Rd 查看文件

@@ -4,7 +4,7 @@
\alias{read_from_csv}
\title{Read the needed tables for developing from a csv file.}
\usage{
read_from_csv(path = "csv/")
read_from_csv(path = "data/csv/")
}
\arguments{
\item{path}{char


+ 1
- 1
man/write_to_csv.Rd 查看文件

@@ -4,7 +4,7 @@
\alias{write_to_csv}
\title{Write the parsed and repaired results into a csv file to make loading and developing faster and easier}
\usage{
write_to_csv(tables, path = "csv/", create = F)
write_to_csv(tables, path = "data/csv/", create = F)
}
\arguments{
\item{tables}{tibble list}


+ 3
- 3
vignettes/funwithdata.Rmd 查看文件

@@ -26,18 +26,18 @@ library(tidyr)

First, you need to download all records of the current legislative period.
```r
fetch_all("../records/") # path to directory where records should be stored
fetch_all("../data/records/") # path to directory where records should be stored
```
Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
```r
read_all("../records/") %>% repair() -> res
read_all("../data/records/") %>% repair() -> res
```
We also used `repair` to fix a bunch of formatting issues in the records and unpacked
the result into more descriptive variables.

For development purposes, we load the tables from csv files.
```{r}
res <- read_from_csv('../csv/')
res <- read_from_csv('../data/csv/')
```
and unpack our tibbles
```{r}


+ 4
- 4
vignettes/hitlercomparison.Rmd 查看文件

@@ -25,11 +25,11 @@ library(ggplot2)

First, you need to download all records of the current legislative period.
```r
fetch_all("../records/") # path to directory where records should be stored
fetch_all("../data/records/") # path to directory where records should be stored
```
Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by:
```r
read_all("../records/") %>% repair() -> res
read_all("../data/records/") %>% repair() -> res

speeches <- res$speeches
speaker <- res$speaker
@@ -40,7 +40,7 @@ the result into more descriptive variables.

For development purposes, we load the tables from csv files.
```{r}
tables <- read_from_csv('../csv/')
tables <- read_from_csv('../data/csv/')

comments <- tables$comments
speeches <- tables$speeches
@@ -50,7 +50,7 @@ talks <- tables$talks

Further, we need to load a list of words that were used by Hitler but not by standard German texts.
```{r}
fil <- file('../hitler_texts/hitler_words')
fil <- file('../data/hitler_texts/hitler_words')
Worte <- readLines(fil)
hitlerwords <- tibble(Worte)
```


正在加载...
取消
保存