From 1463f6092e131bc7c29d3113d9819a7cac24271c Mon Sep 17 00:00:00 2001 From: flavis Date: Wed, 28 Jul 2021 21:23:06 +0200 Subject: [PATCH] add basic analysing utils --- NAMESPACE | 2 ++ R/analyze.R | 13 +++++++++++++ vignettes/funwithdata.Rmd | 12 ++++++++++++ 3 files changed, 27 insertions(+) create mode 100644 R/analyze.R diff --git a/NAMESPACE b/NAMESPACE index b52ac75..9b21836 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,8 @@ # Generated by roxygen2: do not edit by hand export(fetch_all) +export(find_word) +export(join_redner) export(read_all) export(read_from_csv) export(repair) diff --git a/R/analyze.R b/R/analyze.R new file mode 100644 index 0000000..b818005 --- /dev/null +++ b/R/analyze.R @@ -0,0 +1,13 @@ +#' @export +find_word <- function(res, word) { + talks <- res$talks + mutate(talks, occurences = sapply(str_match_all(talks$content, regex(word, ignore_case = TRUE)), + nrow)) +} + +#' @export +join_redner <- function(tb, res, fraktion_only = F) { + joined <- left_join(tb, res$redner, by=c("redner" = "id")) + if (fraktion_only) select(joined, "fraktion") + else joined +} diff --git a/vignettes/funwithdata.Rmd b/vignettes/funwithdata.Rmd index 098ce4c..17c6ff4 100644 --- a/vignettes/funwithdata.Rmd +++ b/vignettes/funwithdata.Rmd @@ -57,3 +57,15 @@ left_join(reden, redner, by=c("redner" = "id")) %>% ggplot(aes(x = fraktion, y = n)) + geom_bar(stat = "identity") ``` + +### Count a word occurence + +```{r} +find_word(res, "hitler") %>% + filter(occurences > 0) %>% + join_redner(res) %>% + select(content, fraktion) %>% + group_by(fraktion) %>% + summarize(n = n()) %>% + arrange(desc(n)) +```