diff --git a/R/analyze.R b/R/analyze.R index b818005..dfb12b7 100644 --- a/R/analyze.R +++ b/R/analyze.R @@ -11,3 +11,21 @@ join_redner <- function(tb, res, fraktion_only = F) { if (fraktion_only) select(joined, "fraktion") else joined } + +party_colors <- c( + SPD="#DF0B25", + "CDU/CSU"="#000000", + AfD="#1A9FDD", + "AfD&Fraktionslos"="#1A9FDD", + "DIE LINKE"="#BC3475", + "BÜNDNIS 90 / DIE GRÜNEN"="#4A932B", + FDP="#FEEB34", + Fraktionslos="#FEEB34" +) + +#' @export +bar_plot_fraktionen <- function(tb) { + ggplot(tb, aes(x = reorder(fraktion, -n), y = n, fill = fraktion)) + + scale_fill_manual(values = party_colors) + + geom_bar(stat = "identity") +} diff --git a/README.md b/README.md index 5be96b0..4e5ed98 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,11 @@ Um dokumentationen neu zu laden / zu erstellen (ruft roxgen auf) document() ``` +Baue vignetten +```r +rmarkdown::render("vignettes/bla.Rmd") +``` + # Herunterladen Bevor analysiert werden kann, muss fetch.R ausgeführt werden, um alle Protokolle herunterzuladen. diff --git a/vignettes/funwithdata.Rmd b/vignettes/funwithdata.Rmd index 17c6ff4..2e6bc28 100644 --- a/vignettes/funwithdata.Rmd +++ b/vignettes/funwithdata.Rmd @@ -29,43 +29,42 @@ fetch_all("../records/") # path to directory where records should be stored Second, those `.xml` files, need to be parsed into `R` `tibbles`. This is accomplished by: ```r read_all("../records/") %>% repair() -> res - -reden <- res$reden -redner <- res$redner -talks <- res$talks ``` We also used `repair` to fix a bunch of formatting issues in the records and unpacked the result into more descriptive variables. For development purposes, we load the tables from csv files. ```{r} -tables <- read_from_csv('../csv/') - -comments <- tables$comments -reden <- tables$reden -redner <- tables$redner -talks <- tables$talks +res <- read_from_csv('../csv/') +``` +and unpack our tibbles +```{r} +comments <- res$comments +reden <- res$reden +redner <- res$redner +talks <- res$talks ``` ## Analysis Now we can start analysing our parsed dataset, e.g. find out which party gives the most talks: -```{r} -left_join(reden, redner, by=c("redner" = "id")) %>% +```{r, fig.width=10} +join_redner(reden, res) %>% group_by(fraktion) %>% summarize(n = n()) %>% - ggplot(aes(x = fraktion, y = n)) + - geom_bar(stat = "identity") + arrange(n) %>% + bar_plot_fraktionen() ``` ### Count a word occurence -```{r} +```{r, fig.width=10} find_word(res, "hitler") %>% filter(occurences > 0) %>% join_redner(res) %>% select(content, fraktion) %>% group_by(fraktion) %>% summarize(n = n()) %>% - arrange(desc(n)) + arrange(desc(n)) %>% + bar_plot_fraktionen() ```