test <- read.table('/Users/zhangzhishuai/Downloads/20 lesson20 R相关性检验及线性拟合/BMI.txt', sep = '\t', header = T,row.names = 1)
test
plot(test$weight,test$height)
cor(test$weight,test$height)
cor.test(test$weight,test$height)
# 线性拟合
result = lm(height~weight, data = test)
summary(result)
plot(test$weight, test$height)
abline(result, col='red', lwd=1) #在图中加直线 lwd:宽度
# 检查正态分布
data = rnorm(1000) #随机生成1000个服从正态分布的数
data
hist(data, prob=T) # 绘制直方图
lines(density(data)) # 在直方图加密度直线,像个钟就符合
shapiro.test(data) #检验是否符合正态分布(P>0.05符合)
qqnorm(data) # QQ图检验是否符合正态分布
# 秩和检验 数据不满足正态分布时不能用T检验
a =c(rep(1,10), rep(2,5), rep(3,4),6,8,10,12,20)
a
hist(a, breaks = seq(0.5,21,by = 1),prob=TRUE)
lines(density(a),col='blue')
abline(v=median(a),col='red')
abline(v=mean(a),col='green')
qqnorm(a)
qqline(a)
b <- c(rep(2,7),rep(3,5),rep(5,8),8,10,18,25)
b
hist(b,prob=T,breaks = seq(0.5,26,by=1))
shapiro.test(b)
wilcox.test(a,b) #秩和检验差异是否显著(a符合正态分布,b不满足正态分布)
# 百分比检验 查看抽样情况是否和常规百分比是否有差别
prop.test(51 # 抽样数量
,400 # 总数
,p=0.1 # 假设百分比,同10%比较
,alternative = 'greater' # 高于10% 还可以用two.sided或者less
)
# 卡方检验 统计样本的实际观测值与理论推断值之间的偏离程度,卡方值越大,两者偏离程度越大
# 例:卡方检验可以检验男性或者女性对线上买生鲜食品有没有区别
data = rbind(c(50,250),c(8,10))
mode(data)
rownames(data) = c('non-smoker','smoker')
colnames(data) = c('disease', 'without disease')
chisq.test(data) # 卡方检验
# 费希尔精确检验 卡方报警可能不准确时候,用费希尔精确检验
fisher.test(data)
disease <- rbind(c(20,40,20),c(30,30,10))
colnames(disease) <- c('stage1','stage2','stage3')
rownames(disease) <- c('male', 'female')
disease
chisq.test(disease)
# 方差检验 检验方差相等(没有显著差异)的多个正态总体均值是否相等 根据影响试验指标条件的个数分为单因素,双因素和多因素方差分析
cholesterol <- read.table('/Users/zhangzhishuai/Downloads/22 lesson22 R方差检验/20_22_statistical_model/treatment.txt',header = T,sep = '\t')
boxplot(response~trt,data = cholesterol) #制作箱型图
shapiro.test(cholesterol$response) # 检验是否符合正态分布
bartlett.test(response~trt, data=cholesterol) # 方差齐性检验,检验方差是否相等(P大于0.05证明5组方差没有显著差异)
fit<- aov(response~trt,data = cholesterol) # 方差检验
fit
summary(fit) # 查看p值,summary总结的意思,总结一下fit P<0.05证明5组之间是有显著差异的,但是两两的不知道
TukeyHSD(fit) # 可以对各组均值差异进行成对检验
BMI.text
treatment.txt:
trt response
drugA 3.8612
drugA 10.3868
drugA 5.9059
drugA 3.0609
drugA 7.7204
drugA 2.7139
drugA 4.9243
drugA 2.3039
drugA 7.5301
drugA 9.4123
drugB 10.3993
drugB 8.6027
drugB 13.632
drugB 3.5054
drugB 7.7703
drugB 8.6266
drugB 9.2274
drugB 6.3159
drugB 15.8258
drugB 8.3443
drugC 13.9621
drugC 13.9606
drugC 13.9176
drugC 8.0534
drugC 11.0432
drugC 12.3692
drugC 10.3921
drugC 9.0286
drugC 12.8416
drugC 18.1794
drugD 16.9819
drugD 15.4576
drugD 19.9793
drugD 14.7389
drugD 13.585
drugD 10.8648
drugD 17.5897
drugD 8.8194
drugD 17.9635
drugD 17.6316
drugE 21.5119
drugE 27.2445
drugE 20.5199
drugE 15.7707
drugE 22.885
drugE 23.9527
drugE 21.5925
drugE 18.3058
drugE 20.3851
drugE 17.3071