score成绩单
install.packages("dplyr")
library(dplyr)
install.packages("tibble")
library(tibble)
install.packages("stringr")
library(stringr)
score = tibble(ID=c("1222-1","2001-0","3321-1","4898-0","2782-0","1002-8","4211-0","1023-1","3325-1"),
gender=c("female","male","male","male","female","female","male","female","male") ,
chinese_mid_score=round(runif(9,80,90),digits = 0),
chinese_final_score=round(runif(9,80,90),digits = 0),
english_score=c(round(runif(8,80,90),digits = 0),NA),
match_score=round(runif(9,80,90),digits = 0),
musci_score=round(runif(9,80,90),digits = 0)
)
view(score)
rowSums
注意 S 是大写的
计算每个学生的总分
score_sum = score |>
mutate(total_score = rowSums(select(score, contains("_score")), na.rm = TRUE))
view(score_sum)
rowSums
,select
以及 contains
结合,可以灵活处理较复杂数据
如果数据量特别大且列名的命名没有规律该怎么办呢?
contain
这种情况下不好用了,用across
score_across = score |>
mutate(total_score = rowSums(across(where(is.numeric)),na.rm=TRUE))
view(score_across)
任务:这次考试总体难度偏难,为了鼓励学生,给每门分数都除以0.95
score_change = score |>
mutate(across(.cols=where(is.numeric), .fns = ~.x/0.95, .names = "curve_{col}"))
view(score_change)
注意across
函数里命名规则,前边都需要加 dot .
其中.fns
functions函数后边有个~
,表示自定义的 function,
.names
命名时记得加""
,{col}表示每列的名字
设置 成绩>=80 为 pass,低于80为 fail
score_pass = score |>
mutate(across(.cols=where(is.numeric), .fns = ~if_else(.x>80,"pass","fail"), .names = "pass_{col}"))
view(score_pass)
通过在.fns
中加入条件判断即可
最后一个综合任务
只考虑数学成绩,先将每个学生的成绩除以0.95,再判断修改后的成绩是否>85分,且新的列用curve_col及pass_col命名
score_match = score |>
mutate(across(.cols = match_score,
.fns = ~ .x/0.95 ,
.names = "curve_{col}"
)
) |>
mutate(pass_match_score = if_else(curve_match_score>=85,"pass","fail")) |>
select(contains("match"))
view(score_match)
以上代码看着些许累赘,我们只考虑原来的成绩决定 pass or fail 就会简单许多
score_match = score |>
mutate(across(.cols = match_score,
.fns = c(curve = ~ .x/0.95 , pass = ~if_else(.>=85,"pass","fail")),
.names = "{fn}_{col}"
)
) |>
select(contains("match"))
view(score_match)
其中,.fns = c(curve = , pass = )表示两个function的名字
.names=“{fn}
_{col}”,{fn}
表示.fns
里新加的两个名字
计算各科平均分、方差、百分数:5%,80%
score_summary = score |>
summarise(across(.cols = where(is.numeric),
.fns = c(
mean = ~mean(.x , na.rm = TRUE),
var = ~var(.x , na.rm = TRUE),
q5 = ~quantile(.x , 0.05, na.rm = TRUE),
q80 = ~quantile(.x , 0.8, na.rm = TRUE)
),
.names = "{fn}_{col}"))
view(score_summary)
平均值 mean、方差 var、第 5 百分位数 q5 和第 80 百分位数 q80。