| @@ -0,0 +1,27 @@ | |||
| # Josua Kugler, Christian Merten | |||
| library(tidyverse) | |||
| `%o%` <- function(f, g) function(...) f(g(...)) | |||
| # ich bin mal so dreist und lese das gesamte file direkt ein | |||
| # wenn hier unbedingt read_lines verwendet werden soll, dann füge | |||
| # man eben zuerst mit paste alle zeilen wieder zusammen | |||
| lines <- read_file("books.txt") | |||
| sort_char <- function(s) paste(sort(unlist(str_split(s, ""))), sep="", collapse="") | |||
| cleared <- str_match_all(str_to_upper(str_replace_all(lines, | |||
| pattern="\\(.*?\\)|[^a-zA-Z0-9]", | |||
| replacement="")), | |||
| "([A-Z]+)([0-9]+)")[[1]] | |||
| unsorted_data <- tibble(category = sapply(cleared[,2], sort_char), | |||
| count = as.integer(cleared[,3])) | |||
| data <- unsorted_data[order(unsorted_data[,1]),] | |||
| books_of_category <- function(data, cat_let) { | |||
| apply(data[str_detect(data$category, cat_let), ], 1, | |||
| function(x) str_glue("We have {x[2]} books of category {x[1]}")) | |||
| } | |||
| @@ -0,0 +1,112 @@ | |||
| # Josua Kugler, Christian Merten | |||
| # install.packages("babynames") | |||
| library(tidyverse) | |||
| ## Create some data----------------------------------------------------------- | |||
| set.seed(1) | |||
| baseset <- list() | |||
| baseset$grade <- as.integer(c(5,6,7,8,9,10,11)) | |||
| baseset$grade_boost <- c(1,3,5,7,8,9,10) | |||
| baseset$letter <- letters[1:4] | |||
| baseset$letter_boost <- sample(1:5, 4, replace=T) | |||
| babynames::babynames %>% | |||
| group_by(sex, name) %>% | |||
| summarise(n = sum(n)) %>% | |||
| arrange(desc(n)) %>% | |||
| mutate(rank = min_rank(-n)) %>% | |||
| filter (rank <= 3000) -> | |||
| ranked_names | |||
| baseset$name <- ranked_names$name | |||
| baseset$distance <- c(100,200,400,1000) | |||
| baseset$distance_boost <- c(14,12,10,8) | |||
| sample_observation <- function(n) { | |||
| res <- list() | |||
| res$name <- sample(baseset$name, n, replace=T) | |||
| res$grade <- sample(baseset$grade, n, replace=T) | |||
| res$letter <- sample(baseset$letter, n, replace=T) | |||
| boost_base <- | |||
| baseset$grade_boost[match(res$grade,baseset$grade)] + | |||
| baseset$letter_boost[match(res$letter,baseset$letter)] | |||
| res$time100 <- sample_time(100, baseset$distance_boost[1] + boost_base) | |||
| res$time200 <- sample_time(200, baseset$distance_boost[2] + boost_base) | |||
| res$time400 <- sample_time(400, baseset$distance_boost[3] + boost_base) | |||
| res$time1000 <- sample_time(1000, baseset$distance_boost[4] + boost_base) | |||
| as_tibble(res) | |||
| } | |||
| sample_time <- function(dist, boost) { | |||
| (runif(length(boost))/2+2.5)/boost*dist*2 | |||
| } | |||
| sports <- sample_observation(1000) | |||
| requirements <- tibble( | |||
| level = 1:11, | |||
| min100 = seq(43,23,len=11), | |||
| min1000 = seq(500,300,len=11) | |||
| ) | |||
| ## Exercises ----------------------------------------------------------------- | |||
| # a) | |||
| # sort sports by 'name' (alphabetically) | |||
| arrange(sports, name) | |||
| # b) | |||
| # sort sports by 'grade' (11, 10, ..., 5), | |||
| # in case of ties by 'letter' (a, b, d, e), | |||
| # in case of ties by 'name' (A-Z) | |||
| arrange(sports, grade, letter, name) | |||
| # c) | |||
| # count the numbers of students per class | |||
| sports %>% group_by(grade, letter) %>% summarize(student_count = n()) | |||
| # d) | |||
| # what is the mean, max and min class size | |||
| sports %>% | |||
| group_by(grade, letter) %>% | |||
| summarize(student_count = n()) %>% | |||
| ungroup() %>% | |||
| summarize(mean_class_size = mean(student_count), | |||
| max_class_size = max(student_count), | |||
| min_class_size = min(student_count)) | |||
| # e) | |||
| # get all students with a non-unqiue name | |||
| sports %>% group_by(name) %>% filter(n()>1) | |||
| # f) | |||
| # get the top 10 sprinters (100m) | |||
| sports %>% top_n(10, -time100) | |||
| # g) | |||
| # get the slowest 10 sprinters (100m) | |||
| sports %>% top_n(10, time100) | |||
| # h) | |||
| # remove 100m, 200m, and 400m, and add velocity in km/h for 1000m | |||
| sports %>% mutate(velocity = 60*60/time1000) %>% select(-(time100:time1000)) | |||
| # i) | |||
| # rename 'grade' to 'level' | |||
| sports %>% rename(level = grade) | |||
| # j) | |||
| # calculate average and min times for 200m in each grade | |||
| sports %>% group_by(grade) %>% summarize(average200 = mean(time200), min200 = min(time200)) | |||
| # k) | |||
| # select all time-columns | |||
| sports %>% select(time100:time1000) | |||
| # l) | |||
| # show the fastest sprinter (100m) in each class | |||
| # sorted by class (ie by grade and letter) | |||
| sports %>% group_by(grade, letter) %>% top_n(1, -time100) %>% arrange(grade, letter) | |||
| @@ -0,0 +1,65 @@ | |||
| R S V C E W Y: 25 | |||
| vqayl 18 (plus minus25) | |||
| wl: 16; MUST - 16; HCUDM: 14; LBR: 20 | |||
| c u g m h: 22 | |||
| BLZJ 24; FEJB - 27; xsa: 20; LDECK 15; S,K: 27. WOAC: 18; WORZE - 26. PRK 25 | |||
| L G U K E 19; w j x n 26 | |||
| XTL: 16; O Y B - 35. XHBWK - 18. AXUPL: 31; YELJ 13; d,r,c,l: 26. QEGX: 21 | |||
| RALM 26. G,K,I,W: 12; IJXC: 19. HPZAY: 19; JSPCO: 22; BIXE: 12 | |||
| VTG: 26 | |||
| dce: 25 | |||
| P J W 23 | |||
| AMFE: 15; YBA: 12; yxb: 26 | |||
| GV 19; SA 23; PTQ: 17; VHIC: 22; K W R A: 22; U V E B Y 23 | |||
| LEQZN 17; MJ - 11; MWQ: 24; LFOJKG: 28 | |||
| PJWUQ: 20. jqpk 14. FKZN 30 | |||
| HKWLDP 21; IVKTFSQ 22. QNA 25; Z X V S: 25. K X 17 | |||
| SEJ 18. FRYA: 14 | |||
| O H E: 19; WT 24. YJCHDNG: 22. JXAV: 22; XQSAT: 27 | |||
| W,M,Z,Y - 20; SVF: 17; IHNU 15 | |||
| KPVL: 12 | |||
| EMADH 30 | |||
| HFSAOJ 26 | |||
| LPVK: 21; LEM: 22. RDWCXA: 15; YEAU 16 | |||
| JCNW: 22 (nicht24) | |||
| F,B,T,W,I,C: 13; QPMZ: 18; albio 22 | |||
| r g - 18; tudnp 13; IEWRBLT: 14; V: 20 | |||
| sefmdb: 25. WMFC: 14; k 30. V U E G: 17; nsfz: 23. O E C A: 21. N X V: 27 | |||
| REGS: 19; DZ: 22 | |||
| K Y - 14 | |||
| EH: 25; QVB: 24. B H E O N - 30; CH - 23 | |||
| AWXC: 18; VGDFZ: 20. ME: 12; YJBA 30. LYXUTHM: 21; D P M W E R 28; mrkavsj: 23; u p y t: 18 | |||
| HEINBFZ: 14; SGDLRK 25. G,M,Y,K,R: 19. UHRJT: 19 | |||
| OD: 30. PAJZYFH 24; h,v,r,x,w: 36 (plus minus15); DBNW: 24; ZJQWGSC: 26; M B X: 20 | |||
| YBCK: 21 | |||
| rclgi: 22. nkzpbg 19 (letztes Jahr15); VXZE: 21 | |||
| YHZ: 21 | |||
| LGTA: 26. K A G: 23. rvg: 20; UTIV - 19; FMVIPRT: 20 (letztes Jahr22) | |||
| P Q X Y N O - 22 | |||
| ZSLHRY 27; KOMEN: 28; vf: 27; F W R 26 (soll:23); Y,X,D: 17; HKDM - 16; QMIGNSY: 16; G S Z - 20 (nicht24); DWUERKH - 24 (soll:20) | |||
| L M E G: 16. ifelp: 28. AKQT: 20; BFQTX: 25; JBVWS 21; R Z 16; FGQCEV - 24; NEOF 19 | |||
| L R I: 19; Z H C W R: 27; A,V,W,L,U,E,T: 23; it: 32. o r p 20 | |||
| e,h: 14. TJSRHFD 25; MOLCHX: 24 | |||
| YAK: 21. m d e b 19 | |||
| DTOAMNWC: 14 | |||
| WNPSBZ 25; WYBNKD 16; xcosjtg 26 | |||
| t,s,z,o,f,w,y,a: 26. X,N,J,D: 22 | |||
| tefyk 20; V J B R K: 21 | |||
| JPEM: 17 | |||
| R: 22; CWGH 24 (soll:15) | |||
| GZPA: 12 | |||
| LJSGP: 32 | |||
| KZBMNH: 26; ILZWJR - 18; GSUC: 20 | |||
| Q I N - 22 (nicht28) | |||
| WUDMG: 18. GQAVW: 15 | |||
| w: 26; EDZ: 25 | |||
| DIX - 9. x n m k f: 17; L Z R O Y V M: 20; QAGVHB: 21; D A H T Q S: 16 | |||
| ZGAOI: 22; LICZM: 20. ZHDBKMLU: 21 | |||
| FCJUH: 20. IHJVSWB: 18; D F W G: 22. FKWSRM 30. G,M,O 26 | |||
| EKI: 18; unm - 22; CZWA: 18; PZML: 25; FVCUY: 27. Z,B,G,X,J,H - 21 | |||
| imvx: 21. cd: 27; YVCZQG 22 | |||
| XTVI: 20 | |||
| GVCDXBNK: 23 | |||
| LFGX: 26. EKMV 25; lhdika: 21; OGHLB 21 | |||
| PANDMC: 24 | |||
| twb: 20 | |||