fractions <- c("AFD" = "AfD",
                "AFD&FRAKTIONSLOS" = "AfD&Fraktionslos",
                "B\u00DCNDNIS90/" = "B\u00DCNDNIS 90/DIE GR\u00DCNEN",
                "B\u00DCNDNIS90/DIEGR\u00DCNEN" = "B\u00DCNDNIS 90/DIE GR\u00DCNEN",
                "FRAKTIONSLOS" = "Fraktionslos",
                "DIELINKE" = "DIE LINKE",
                "SPD" = "SPD",
                "CDU/CSU" = "CDU/CSU",
                "FDP" = "FDP")

repair_fraction <- function(fraction) {
    cleaned <- str_to_upper %$% str_replace_all(fraction, "\\s", "")
    fractions[cleaned]
}

# takes vector of titel and keeps longest
longest_titel <- function(titel) {
    if (all(is.na(titel))) NA_character_
    else titel[which.max %$% str_length(titel)]
}

# takes character vector, removes duplicates and collapses
collect_unique <- function(xs) xs %>% clear_na() %>% unique() %>% str_c(collapse="&") %>% na_if("")

# expects a tibble of speaker and repairs
repair_speaker <- function(speaker) {
    if (nrow(speaker) == 0) return(speaker)
    speaker %>%
        filter(id != "10000") %>% # invalid id's
        mutate(fraction = Vectorize(repair_fraction)(fraction)) %>% # fix fraction
        group_by(id) %>%
        summarize(prename = head(prename, 1),
                  lastname = head(lastname, 1),
                  fraction = collect_unique(fraction),
                  title = longest_titel(title),
                  role_short = collect_unique(str_squish(role_short)),
                  role_long = collect_unique(str_squish(role_long))) %>%
        ungroup() #%>%
}

repair_speeches <- function(speeches) {
    if (nrow(speeches) == 0) return(speeches)
    # TODO: fill with content
    speeches
}

repair_talks <- function(talks) {
    if (nrow(talks) == 0) return(talks)
    # ignore all talks which have empty content
    filter(talks, str_length(content) > 0)
}

#' Lookup name in speakers table
#'
#' Tries to find the correct speaker id given a name.
#' This is sufficient since every prename lastname combination in the bundestag is
#' unique (luckily :D)
#' 
#' @param tb tibble
#' @param speaker tibble
#' @param name_variable name
#' 
#' Tries to match the name_variable column with speaker names
#' 
#' returns a lookup table
lookup_speaker <- function(tb, speaker, name_variable) {
    tobereplaced <- "[\u002D\u2013\u2014\u2011\u00AD ]"
    speaker %>%
        unite(name, prename, lastname, sep=".*") %>%
        mutate(name = str_replace_all(name, tobereplaced, ".*")) ->
        rs
    find_match <- function(komm) {
        if (komm == "") return (NA_character_)
        # I tried with agrep (levensthein distance) but results are better that way
        matches <- str_which(komm, rs$name)
        if (length(matches) == 0) return(NA_character_)
        rs[head(matches, 1), ]$id
    }
    tb %>%
        distinct({{name_variable}}) %>%
        mutate(speaker = Vectorize(find_match)(str_replace_all({{name_variable}}, tobereplaced, "")))
}

repair_comments <- function(comments, speaker, lookup_speaker=F) {
    comments %>%
        filter(!is.na(commenter) | !is.na(content) | !is.na(fraction)) ->
        tb
    if (lookup_speaker) {
        cat(paste0("Looking up speaker id's for names in comments. This may take a while ...\n",
                   "Use repair(, lookup_speaker = FALSE) to skip this.\n"))
        # try to find a speaker id for each actual comment
        tb %>%
            filter(!is.na(commenter)) %>%
            lookup_speaker(speaker, commenter) %>%
            left_join(tb, ., by="commenter")
    } else tb
}

#' Repair parsed tables
#'
#' @param parse_output tibble
#' @param lookup_speaker bool
#' 
#' If lookup_speaker is TRUE, members of the parliament mentioned in comments are looked up in speaker table.
#'
#' Possible test: check identical(repair(res), repair(repair(res))) == TRUE
#' Since repaired tables should be a fixpoint of repair.
#' @export
repair <- function(parse_output, lookup_speaker = FALSE) {
    is_valid_res(parse_output)
    stopifnot("lookup_speaker must be of type logical" = is.logical(lookup_speaker))
    list(speaker = repair_speaker(parse_output$speaker),
         speeches = repair_speeches(parse_output$speeches),
         talks = repair_talks(parse_output$talks),
         comments = repair_comments(parse_output$comments,
                                    parse_output$speaker,
                                    lookup_speaker),
         applause = parse_output$applause)
}