文章目录
- 数据诊断分析(均值方差)
- Matlab代码实现
- 结果展示
数据诊断分析(均值方差)
- 均值方差检测是一种简单但有效的异常检测方法,主要基于样本的均值和方差的统计信息。该方法的核心思想是假设正常的样本点应该聚集在某个区域,而异常点则可能远离这个区域。
-
3
σ
3\sigma
3σ准则:是一种统计学中常用的规则,用于衡量数据集中的值离均值的距离。该规则基于正态分布的性质,提供了一个衡量数据集中值的离散程度的指标。
- 具体来说,
3
σ
3\sigma
3σ准则表明:
- 对于一个正态分布的数据集,约有 68% 的数据值会落在均值的一个标准差范围内。
- 约有 95% 的数据值会落在均值的两个标准差范围内。
- 约有 99.7% 的数据值会落在均值的三个标准差范围内。
Matlab代码实现
clear;clc;close all
load('.\data\daily_sia_7920.mat');
sia=min(cdr_sia./1e11);
for ii=1:42
sia_day(ii)=find(cdr_sia(:,ii)./1e11==sia(ii));
end
msia=mean(sia_day);
ssia=std(sia_day);
siz=25;lind=1.5;
x_0=0.10;
y_0=0.70;
len=0.85;
width=0.25;
d_x=0.32;
d_y=-0.27;
px=[0 0 0 0];
py=[0 1 2 3];
set(gcf,'color',[1 1 1],'position',[10 45 800 800*1.2]);
axes('position',[x_0+d_x*px(1), y_0+d_y*py(1), len, width]);
plot(sia_day,'k-*','linewidth',lind);hold on
plot([1 42],[msia,msia],'k-','linewidth',lind);hold on
plot([1 42],[msia+ssia,msia+ssia],'k--','linewidth',lind);hold on
plot([1 42],[msia-ssia,msia-ssia],'k--','linewidth',lind);hold on
C1=sia_day;C1(C1>msia-ssia)=nan;
C3=sia_day;C3(C3<msia+ssia)=nan;
scatter([1:42],C1,60,'b','filled','s');hold on
scatter([1:42],C3,60,'r','filled','^');hold on
set(gca,'linewidth',lind);grid on
set(gca,'xlim',[0 43],'xtick',1:5:42,'xticklabel','',...
'fontname','Times New Roman','FontSize',siz-10,'fontweight','bold')
set(gca,'ylim',[210 280],'ytick',210:20:280,'yticklabel',num2str([210:20:280]','
'Times New Roman','FontSize',siz-10,'fontweight','bold');
ylabel('Day of year (d)','fontname','Times New Roman',...
'FontSize',siz-10,'fontweight','bold');
hh=get(gca);
X=hh.XLim;
Y=hh.YLim;
k1=[0.03 0.8];
k2=[0.3 0.8];
k3=[0.03 0.9];
x_2=X(1)+k2(1)*(X(2)-X(1));
y_2=Y(1)+k2(2)*(Y(2)-Y(1));
x_3=X(1)+k3(1)*(X(2)-X(1));
y_3=Y(1)+k3(2)*(Y(2)-Y(1));
text(double(x_3),double(y_3),'(a)','color','k','fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
load('.\data\BFT_wind2_daily_7920.mat')
ue=u;
uc=mean(u);
stdc=std(u);
data1=ue-uc;
data2=ue-uc-stdc;
data3=ue-uc-2*stdc;
data1(data1<0)=nan;data1(~isnan(data1))=1;
data2(data2<0)=nan;data2(~isnan(data2))=1;
data3(data3<0)=nan;data3(~isnan(data3))=1;
mon=[31 28 31 30 31 30 31 31 30 31 30 31];
mona=[1,32,60,91,121,152,182,213,244,274,305,335];
monb=[31,59,90,120,151,181,212,243,273,304,334,365];
barmap1=[190 223 235;0 190 255;26 26 210]./255;
for mm=1:12
datam=nansum(data1(mona(mm):monb(mm),:),1);
datas1=nansum(data2(mona(mm):monb(mm),:),1);
datas2=nansum(data3(mona(mm):monb(mm),:),1);
datad0(mm,:)=datam-datas1;
datad1(mm,:)=datas1-datas2;
datad2(mm,:)=datas2;
end
data590=nansum(datad0(5:9,:),1);
data591=nansum(datad1(5:9,:),1);
data592=nansum(datad2(5:9,:),1);
axes('position',[x_0+d_x*px(2), y_0+d_y*py(2), len, width]);
ch=bar([data590;data591;data592]','stacked');
grid on
set(ch(1),'FaceColor',barmap1(1,:));
set(ch(2),'FaceColor',barmap1(2,:));
set(ch(3),'FaceColor',barmap1(3,:));
set(gca,'linewidth',lind,'ylim',[0 120]);
ylabel('Days yr^{-1}','fontname','Times New Roman','FontSize',siz-10,'fontweight','bold');
text(15,110,{'>0\sigma days'},'color',barmap1(1,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,100,{'≥1\sigma days'},'color',barmap1(2,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,90,{'≥2\sigma days'},'color',barmap1(3,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
set(gca,'xlim',[0 43],'xtick',[1:5:42],'xticklabel','','Fontname',...
'Times New Roman','FontSize',siz-10,'fontweight','bold')
hh=get(gca);
X=hh.XLim;
Y=hh.YLim;
k1=[0.03 0.8];
k2=[0.3 0.8];
k3=[0.03 0.9];
x_2=X(1)+k2(1)*(X(2)-X(1));
y_2=Y(1)+k2(2)*(Y(2)-Y(1));
x_3=X(1)+k3(1)*(X(2)-X(1));
y_3=Y(1)+k3(2)*(Y(2)-Y(1));
text(double(x_3),double(y_3),'(b)','color','k','fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
load('.\data\BFT_wind2_daily_7920.mat')
ue=u;
uc=mean(u);
stdc=std(u);
data1=ue-uc;
data2=ue-uc-stdc;
data3=ue-uc-2*stdc;
data1(data1<0)=nan;data1(~isnan(data1))=1;
data2(data2<0)=nan;data2(~isnan(data2))=1;
data3(data3<0)=nan;data3(~isnan(data3))=1;
barmap2=[178 38 38;255 27 255;221 159 221;255 195 204;187 143 143]./255;
for yy=1:42
for mm=1:12
events3=0;
for aa=mona(mm):monb(mm)-2
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy))
events3=events3+1;
end
Events3(mm,yy)=events3;
end
events4=0;
for aa=mona(mm):monb(mm)-3
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy)) & ~isnan(data2(aa+3,yy))
events4=events4+1;
end
Events4(mm,yy)=events4;
end
events5=0;
for aa=mona(mm):monb(mm)-4
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy)) & ~isnan(data2(aa+3,yy))...
& ~isnan(data2(aa+4,yy))
events5=events5+1;
end
Events5(mm,yy)=events5;
end
events6=0;
for aa=mona(mm):monb(mm)-5
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy)) & ~isnan(data2(aa+3,yy))...
& ~isnan(data2(aa+4,yy)) & ~isnan(data2(aa+5,yy))
events6=events6+1;
end
Events6(mm,yy)=events6;
end
events7=0;
for aa=mona(mm):monb(mm)-6
if ~isnan(data2(aa,yy)) & ~isnan(data2(aa+1,yy)) & ~isnan(data2(aa+2,yy)) & ~isnan(data2(aa+3,yy))...
& ~isnan(data2(aa+4,yy)) & ~isnan(data2(aa+5,yy)) & ~isnan(data2(aa+6,yy))
events7=events7+1;
end
Events7(mm,yy)=events7;
end
end
end
datas3=nansum(Events3(5:9,:),1);
datas4=nansum(Events4(5:9,:),1);
datas5=nansum(Events5(5:9,:),1);
datas6=nansum(Events6(5:9,:),1);
datas7=nansum(Events7(5:9,:),1);
axes('position',[x_0+d_x*px(3), y_0+d_y*py(3), len, width]);
ch=bar([datas3;datas4;datas5;datas6;datas7]','stacked');
grid on
set(ch(1),'FaceColor',barmap2(1,:));
set(ch(2),'FaceColor',barmap2(2,:));
set(ch(3),'FaceColor',barmap2(3,:));
set(ch(4),'FaceColor',barmap2(4,:));
set(ch(5),'FaceColor',barmap2(5,:));
set(gca,'linewidth',lind,'ylim',[0 60]);
ylabel('Events yr^{-1}','fontname','Times New Roman','FontSize',siz-10,'fontweight','bold');
text(15,58,{'3-days events'},'color',barmap2(1,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,53,{'4-days events'},'color',barmap2(2,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,48,{'5-days events'},'color',barmap2(3,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,43,{'6-days events'},'color',barmap2(4,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
text(15,38,{'7-days events'},'color',barmap2(5,:),'fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
set(gca,'xlim',[0 43],'xtick',[1:5:42],'xticklabel',1979:5:2020,'Fontname',...
'Times New Roman','FontSize',siz-10,'fontweight','bold')
hh=get(gca);
X=hh.XLim;
Y=hh.YLim;
k1=[0.03 0.8];
k2=[0.3 0.8];
k3=[0.03 0.9];
x_2=X(1)+k2(1)*(X(2)-X(1));
y_2=Y(1)+k2(2)*(Y(2)-Y(1));
x_3=X(1)+k3(1)*(X(2)-X(1));
y_3=Y(1)+k3(2)*(Y(2)-Y(1));
text(double(x_3),double(y_3),'(c)','color','k','fontname',...
'Times New Roman','fontweight','bold','fontsize',siz-10);
结果展示