| @@ -0,0 +1,27 @@ | |||||
| # Josua Kugler, Christian Merten | |||||
| library(tidyverse) | |||||
| `%o%` <- function(f, g) function(...) f(g(...)) | |||||
| # ich bin mal so dreist und lese das gesamte file direkt ein | |||||
| # wenn hier unbedingt read_lines verwendet werden soll, dann füge | |||||
| # man eben zuerst mit paste alle zeilen wieder zusammen | |||||
| lines <- read_file("books.txt") | |||||
| sort_char <- function(s) paste(sort(unlist(str_split(s, ""))), sep="", collapse="") | |||||
| cleared <- str_match_all(str_to_upper(str_replace_all(lines, | |||||
| pattern="\\(.*?\\)|[^a-zA-Z0-9]", | |||||
| replacement="")), | |||||
| "([A-Z]+)([0-9]+)")[[1]] | |||||
| unsorted_data <- tibble(category = sapply(cleared[,2], sort_char), | |||||
| count = as.integer(cleared[,3])) | |||||
| data <- unsorted_data[order(unsorted_data[,1]),] | |||||
| books_of_category <- function(data, cat_let) { | |||||
| apply(data[str_detect(data$category, cat_let), ], 1, | |||||
| function(x) str_glue("We have {x[2]} books of category {x[1]}")) | |||||
| } | |||||
| @@ -0,0 +1,112 @@ | |||||
| # Josua Kugler, Christian Merten | |||||
| # install.packages("babynames") | |||||
| library(tidyverse) | |||||
| ## Create some data----------------------------------------------------------- | |||||
| set.seed(1) | |||||
| baseset <- list() | |||||
| baseset$grade <- as.integer(c(5,6,7,8,9,10,11)) | |||||
| baseset$grade_boost <- c(1,3,5,7,8,9,10) | |||||
| baseset$letter <- letters[1:4] | |||||
| baseset$letter_boost <- sample(1:5, 4, replace=T) | |||||
| babynames::babynames %>% | |||||
| group_by(sex, name) %>% | |||||
| summarise(n = sum(n)) %>% | |||||
| arrange(desc(n)) %>% | |||||
| mutate(rank = min_rank(-n)) %>% | |||||
| filter (rank <= 3000) -> | |||||
| ranked_names | |||||
| baseset$name <- ranked_names$name | |||||
| baseset$distance <- c(100,200,400,1000) | |||||
| baseset$distance_boost <- c(14,12,10,8) | |||||
| sample_observation <- function(n) { | |||||
| res <- list() | |||||
| res$name <- sample(baseset$name, n, replace=T) | |||||
| res$grade <- sample(baseset$grade, n, replace=T) | |||||
| res$letter <- sample(baseset$letter, n, replace=T) | |||||
| boost_base <- | |||||
| baseset$grade_boost[match(res$grade,baseset$grade)] + | |||||
| baseset$letter_boost[match(res$letter,baseset$letter)] | |||||
| res$time100 <- sample_time(100, baseset$distance_boost[1] + boost_base) | |||||
| res$time200 <- sample_time(200, baseset$distance_boost[2] + boost_base) | |||||
| res$time400 <- sample_time(400, baseset$distance_boost[3] + boost_base) | |||||
| res$time1000 <- sample_time(1000, baseset$distance_boost[4] + boost_base) | |||||
| as_tibble(res) | |||||
| } | |||||
| sample_time <- function(dist, boost) { | |||||
| (runif(length(boost))/2+2.5)/boost*dist*2 | |||||
| } | |||||
| sports <- sample_observation(1000) | |||||
| requirements <- tibble( | |||||
| level = 1:11, | |||||
| min100 = seq(43,23,len=11), | |||||
| min1000 = seq(500,300,len=11) | |||||
| ) | |||||
| ## Exercises ----------------------------------------------------------------- | |||||
| # a) | |||||
| # sort sports by 'name' (alphabetically) | |||||
| arrange(sports, name) | |||||
| # b) | |||||
| # sort sports by 'grade' (11, 10, ..., 5), | |||||
| # in case of ties by 'letter' (a, b, d, e), | |||||
| # in case of ties by 'name' (A-Z) | |||||
| arrange(sports, grade, letter, name) | |||||
| # c) | |||||
| # count the numbers of students per class | |||||
| sports %>% group_by(grade, letter) %>% summarize(student_count = n()) | |||||
| # d) | |||||
| # what is the mean, max and min class size | |||||
| sports %>% | |||||
| group_by(grade, letter) %>% | |||||
| summarize(student_count = n()) %>% | |||||
| ungroup() %>% | |||||
| summarize(mean_class_size = mean(student_count), | |||||
| max_class_size = max(student_count), | |||||
| min_class_size = min(student_count)) | |||||
| # e) | |||||
| # get all students with a non-unqiue name | |||||
| sports %>% group_by(name) %>% filter(n()>1) | |||||
| # f) | |||||
| # get the top 10 sprinters (100m) | |||||
| sports %>% top_n(10, -time100) | |||||
| # g) | |||||
| # get the slowest 10 sprinters (100m) | |||||
| sports %>% top_n(10, time100) | |||||
| # h) | |||||
| # remove 100m, 200m, and 400m, and add velocity in km/h for 1000m | |||||
| sports %>% mutate(velocity = 60*60/time1000) %>% select(-(time100:time1000)) | |||||
| # i) | |||||
| # rename 'grade' to 'level' | |||||
| sports %>% rename(level = grade) | |||||
| # j) | |||||
| # calculate average and min times for 200m in each grade | |||||
| sports %>% group_by(grade) %>% summarize(average200 = mean(time200), min200 = min(time200)) | |||||
| # k) | |||||
| # select all time-columns | |||||
| sports %>% select(time100:time1000) | |||||
| # l) | |||||
| # show the fastest sprinter (100m) in each class | |||||
| # sorted by class (ie by grade and letter) | |||||
| sports %>% group_by(grade, letter) %>% top_n(1, -time100) %>% arrange(grade, letter) | |||||
| @@ -0,0 +1,65 @@ | |||||
| R S V C E W Y: 25 | |||||
| vqayl 18 (plus minus25) | |||||
| wl: 16; MUST - 16; HCUDM: 14; LBR: 20 | |||||
| c u g m h: 22 | |||||
| BLZJ 24; FEJB - 27; xsa: 20; LDECK 15; S,K: 27. WOAC: 18; WORZE - 26. PRK 25 | |||||
| L G U K E 19; w j x n 26 | |||||
| XTL: 16; O Y B - 35. XHBWK - 18. AXUPL: 31; YELJ 13; d,r,c,l: 26. QEGX: 21 | |||||
| RALM 26. G,K,I,W: 12; IJXC: 19. HPZAY: 19; JSPCO: 22; BIXE: 12 | |||||
| VTG: 26 | |||||
| dce: 25 | |||||
| P J W 23 | |||||
| AMFE: 15; YBA: 12; yxb: 26 | |||||
| GV 19; SA 23; PTQ: 17; VHIC: 22; K W R A: 22; U V E B Y 23 | |||||
| LEQZN 17; MJ - 11; MWQ: 24; LFOJKG: 28 | |||||
| PJWUQ: 20. jqpk 14. FKZN 30 | |||||
| HKWLDP 21; IVKTFSQ 22. QNA 25; Z X V S: 25. K X 17 | |||||
| SEJ 18. FRYA: 14 | |||||
| O H E: 19; WT 24. YJCHDNG: 22. JXAV: 22; XQSAT: 27 | |||||
| W,M,Z,Y - 20; SVF: 17; IHNU 15 | |||||
| KPVL: 12 | |||||
| EMADH 30 | |||||
| HFSAOJ 26 | |||||
| LPVK: 21; LEM: 22. RDWCXA: 15; YEAU 16 | |||||
| JCNW: 22 (nicht24) | |||||
| F,B,T,W,I,C: 13; QPMZ: 18; albio 22 | |||||
| r g - 18; tudnp 13; IEWRBLT: 14; V: 20 | |||||
| sefmdb: 25. WMFC: 14; k 30. V U E G: 17; nsfz: 23. O E C A: 21. N X V: 27 | |||||
| REGS: 19; DZ: 22 | |||||
| K Y - 14 | |||||
| EH: 25; QVB: 24. B H E O N - 30; CH - 23 | |||||
| AWXC: 18; VGDFZ: 20. ME: 12; YJBA 30. LYXUTHM: 21; D P M W E R 28; mrkavsj: 23; u p y t: 18 | |||||
| HEINBFZ: 14; SGDLRK 25. G,M,Y,K,R: 19. UHRJT: 19 | |||||
| OD: 30. PAJZYFH 24; h,v,r,x,w: 36 (plus minus15); DBNW: 24; ZJQWGSC: 26; M B X: 20 | |||||
| YBCK: 21 | |||||
| rclgi: 22. nkzpbg 19 (letztes Jahr15); VXZE: 21 | |||||
| YHZ: 21 | |||||
| LGTA: 26. K A G: 23. rvg: 20; UTIV - 19; FMVIPRT: 20 (letztes Jahr22) | |||||
| P Q X Y N O - 22 | |||||
| ZSLHRY 27; KOMEN: 28; vf: 27; F W R 26 (soll:23); Y,X,D: 17; HKDM - 16; QMIGNSY: 16; G S Z - 20 (nicht24); DWUERKH - 24 (soll:20) | |||||
| L M E G: 16. ifelp: 28. AKQT: 20; BFQTX: 25; JBVWS 21; R Z 16; FGQCEV - 24; NEOF 19 | |||||
| L R I: 19; Z H C W R: 27; A,V,W,L,U,E,T: 23; it: 32. o r p 20 | |||||
| e,h: 14. TJSRHFD 25; MOLCHX: 24 | |||||
| YAK: 21. m d e b 19 | |||||
| DTOAMNWC: 14 | |||||
| WNPSBZ 25; WYBNKD 16; xcosjtg 26 | |||||
| t,s,z,o,f,w,y,a: 26. X,N,J,D: 22 | |||||
| tefyk 20; V J B R K: 21 | |||||
| JPEM: 17 | |||||
| R: 22; CWGH 24 (soll:15) | |||||
| GZPA: 12 | |||||
| LJSGP: 32 | |||||
| KZBMNH: 26; ILZWJR - 18; GSUC: 20 | |||||
| Q I N - 22 (nicht28) | |||||
| WUDMG: 18. GQAVW: 15 | |||||
| w: 26; EDZ: 25 | |||||
| DIX - 9. x n m k f: 17; L Z R O Y V M: 20; QAGVHB: 21; D A H T Q S: 16 | |||||
| ZGAOI: 22; LICZM: 20. ZHDBKMLU: 21 | |||||
| FCJUH: 20. IHJVSWB: 18; D F W G: 22. FKWSRM 30. G,M,O 26 | |||||
| EKI: 18; unm - 22; CZWA: 18; PZML: 25; FVCUY: 27. Z,B,G,X,J,H - 21 | |||||
| imvx: 21. cd: 27; YVCZQG 22 | |||||
| XTVI: 20 | |||||
| GVCDXBNK: 23 | |||||
| LFGX: 26. EKMV 25; lhdika: 21; OGHLB 21 | |||||
| PANDMC: 24 | |||||
| twb: 20 | |||||