图型 | 所在包 | 样例 | 例图 | ||||||||||||||
热图 | seaborn | import matplotlib.pyplot as plt import seaborn as sns sns.heatmap(df.isnull()) plt.show() | Bitmap Bitmap
| ||||||||||||||
import numpy as np # 获取数据 fraud = data_df[data_df['Class'] == 1] nonFraud = data_df[data_df['Class'] == 0] # 相关性计算 correlationNonFraud = nonFraud.loc[:, data_df.columns != 'Class'].corr() correlationFraud = fraud.loc[:, data_df.columns != 'Class'].corr() # 上三角矩阵设置 mask = np.zeros_like(correlationNonFraud)# 全部设置0 indices = np.triu_indices_from(correlationNonFraud)#返回函数的上三角矩阵 mask[indices] = True grid_kws = {"width_ratios": (.9, .9, .05), "wspace": 0.2} f, (ax1, ax2, cbar_ax) = plt.subplots(1, 3, gridspec_kw=grid_kws, figsize = (14, 9)) # 正常用户-特征相关性展示 cmap = sns.diverging_palette(220, 8, as_cmap=True) ax1 =sns.heatmap(correlationNonFraud, ax = ax1, vmin = -1, vmax = 1, \ cmap = cmap, square = False, linewidths = 0.5, mask = mask, cbar = False) ax1.set_xticklabels(ax1.get_xticklabels(), size = 16); ax1.set_yticklabels(ax1.get_yticklabels(), size = 16); ax1.set_title('Normal', size = 20) # 被盗刷的用户-特征相关性展示 ax2 = sns.heatmap(correlationFraud, vmin = -1, vmax = 1, cmap = cmap, \ ax = ax2, square = False, linewidths = 0.5, mask = mask, yticklabels = False, \ cbar_ax = cbar_ax, cbar_kws={'orientation': 'vertical', \ 'ticks': [-1, -0.5, 0, 0.5, 1]}) ax2.set_xticklabels(ax2.get_xticklabels(), size = 16); ax2.set_title('Fraud', size = 20); |
| ||||||||||||||||
柱形图 | matplotlib | import warnings warnings.filterwarnings('ignore') import matplotlib.gridspec as gridspec v_feat_col = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10', 'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20','V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28'] v_feat_col_size = len(v_feat_col) plt.figure(figsize=(16,v_feat_col_size*4)) gs = gridspec.GridSpec(v_feat_col_size, 1) for i, cn in enumerate(data_df[v_feat_col]): ax = plt.subplot(gs[i]) sns.distplot(data_df[cn][data_df["Class"] == 1], bins=50)# V1 异常 绿色表示 sns.distplot(data_df[cn][data_df["Class"] == 0], bins=100)# V1 正常 橘色表示 ax.set_xlabel('') ax.set_title('histogram of feature: ' + str(cn)) | Bitmap
| ||||||||||||||
# 可视化特征重要性 plt.style.use('fivethirtyeight') plt.rcParams['figure.figsize'] = (12,6) ## feature importances 可视化## importances = clf.feature_importances_ feat_names = data_df_new[x_feature].columns indices = np.argsort(importances)[::-1] fig = plt.figure(figsize=(20,6)) plt.title("Feature importances by RandomTreeClassifier") x = list(range(len(indices))) plt.bar(x, importances[indices], color='lightblue', align="center") plt.step(x, np.cumsum(importances[indices]), where='mid', label='Cumulative') plt.xticks(x, feat_names[indices], rotation='vertical',fontsize=14) plt.xlim([-1, len(indices)]) | Bitmap
| ||||||||||||||||
# 是否异常和交易金额关系分析 f, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=(16,4)) bins = 30 ax1.hist(data_df["Amount"][data_df["Class"]== 1], bins = bins) ax1.set_title('Fraud') ax2.hist(data_df["Amount"][data_df["Class"] == 0], bins = bins) ax2.set_title('Normal') plt.xlabel('Amount ($)') plt.ylabel('Number of Transactions') plt.yscale('log') plt.show() | Bitmap
| ||||||||||||||||
plotly | # Months_Inactive_12_mon字段与Attrition_Flag字段的关系( fig = px.histogram(df, x="Months_Inactive_12_mon", color="Attrition_Flag",title='Number of months with no transactions in the last year') plotly.offline.iplot(fig) | Bitmap
| |||||||||||||||
seaborn | import seaborn as sns Gender = sns.countplot(x = 'Gender',hue = 'Attrition_Flag',data=df1,palette='Set2') Gen_att = df1.loc[df1['Attrition_Flag']=='Attrited Customer','Gender'] Gen_ex = df1.loc[df1['Attrition_Flag']=='Existing Customer','Gender'] print('Gender of Attrited customer:\n',Gen_att.value_counts()) print('-----------------------------------------------------------') print('Gender of Existing customer:\n',Gen_ex.value_counts()) print('-----------------------------------------------------------') print('Gender of Total customer:\n',df1.Gender.value_counts()) | Bitmap
| |||||||||||||||
# 了解Customer_Age字段与Attrition_Flag字段的关系 import matplotlib.pyplot as plt sns.set_style('whitegrid') plt.figure(figsize=(10,8)) sns.set_context('paper', font_scale=1.5) sns.histplot(x='Customer_Age', data = df1, hue ='Attrition_Flag').set_title('Customer by Age') | Bitmap
| ||||||||||||||||
sns.factorplot(x="Hour", data=data_df, kind="count", size=6, aspect=3) | Bitmap Bitmap
| ||||||||||||||||
组合图 | matplotlib seaborn | # 正负样本分布可视化 import matplotlib.pyplot as plt import seaborn as sns fig, axs = plt.subplots(1,2,figsize=(14,7)) # 柱状图 sns.countplot(x='Class',data=data_df,ax=axs[0]) axs[0].set_title("Frequency of each Class") # 饼图 data_df['Class'].value_counts().plot(x=None,y=None, kind='pie', ax=axs[1],autopct='%1.2f%%') axs[1].set_title("Percentage of each Class") plt.show() | Bitmap
| ||||||||||||||
箱型图 | plotly | # Total_Revolving_Bal字段与Attrition_Flag字段的关系(箱型图) fig = px.box(df, color="Attrition_Flag", y="Total_Revolving_Bal", points="all",title='Total revolving balance on the credit card') plotly.offline.iplot(fig) | Bitmap
| ||||||||||||||
matplotlib | # 了解数值型变量的分布 fig, ax= plt.subplots(nrows= 2, ncols = 3, figsize= (14,6)) sns.boxplot(x=df["CNT_CHILDREN"], ax=ax[0][0]) sns.boxplot(x=df["AMT_INCOME_TOTAL"], ax=ax[0][1]) sns.boxplot(x=df["DAYS_BIRTH"], ax=ax[0][2]) sns.boxplot(x=df["DAYS_EMPLOYED"], ax=ax[1][0]) sns.boxplot(x=df["CNT_FAM_MEMBERS"], ax=ax[1][1]) sns.boxplot(x=df["MONTHS_BALANCE"], ax=ax[1][2]) plt.show() | Bitmap
| |||||||||||||||
饼图 | plotly | # Card_Category字段统计 fig = px.pie(df1,names='Card_Category',title='Percentage of Card type',hole=0.3) fig.update_traces(textposition='outside', textinfo='percent+label') plotly.offline.iplot(fig) |
| ||||||||||||||
# 可视化Marital_Status字段与Attrition_Flag字段的关系 from plotly.subplots import make_subplots import plotly.graph_objs as go fig = make_subplots( rows=2, cols=2,subplot_titles=('Total Customer','Existing Customers','Attrited Customers','Residuals'), vertical_spacing=0.09, specs=[[{"type": "pie","rowspan": 2} ,{"type": "pie"}] , [None ,{"type": "pie"}] , ] ) fig.add_trace( go.Pie(values=df1.Marital_Status.value_counts().values, labels=['Married','Single','Unknow', 'Divorced'], pull=[0,0.01,0.03,0.03], hole=0.3), row=1, col=1 ) fig.add_trace( go.Pie( labels=['Married', 'Single','Divorced', 'Unknown'], values=df1.query('Attrition_Flag=="Existing Customer"').Marital_Status.value_counts().values, pull=[0,0.01,0.05,0.05], hole=0.3), row=1, col=2 ) fig.add_trace( go.Pie( labels=['Married', 'Single','Unknown','Divorced'], values=df1.query('Attrition_Flag=="Attrited Customer"').Marital_Status.value_counts().values, pull=[0,0.01,0.05,0.05], hole=0.3), row=2, col=2 ) fig.update_layout( height=700, showlegend=True, title_text="<b>Martial Status<b>", ) fig.update_traces(textposition='inside', textinfo='percent+label') plotly.offline.iplot(fig) | Bitmap
| ||||||||||||||||
风琴图 | plotly | fig = fig = px.violin(df, color="Attrition_Flag", y="Total_Trans_Ct", points="all",title='Number of transactions made in the last year') plotly.offline.iplot(fig) | Bitmap
|