1、加载数据
import pandas as pd
from sklearn.datasets import load_iris
import warnings
# 禁用所有警告信息
warnings.filterwarnings('ignore')
# 加载数据
iris = load_iris()
iris
iris.keys()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
df.head()
2、基于seaborn的直方图
sns.displot(df['sepal length (cm)'], kind='hist', rug=True)
sns.displot(y=df['sepal length (cm)'])
3、基于matplotlib的直方图
import matplotlib.pyplot as plt
# 初始画布
fig, ax = plt.subplots(figsize=(4, 3))
ax.hist(df['sepal length (cm)'], edgecolor='black')
plt.show()
4、绘制子图对比
# 构造子图
fig, ax = plt.subplots(3, 2, constrained_layout=True, figsize=(12, 16))
sns.histplot(data=df, x='sepal length (cm)', ax=ax[0][0])
sns.histplot(data=df, y='sepal length (cm)', ax=ax[0][1])
# 自定义分箱数量bins
ax_sub = sns.histplot(data=df, x='sepal length (cm)', bins=20, ax=ax[1][0])
ax_sub.set_title('自定义分箱')
# 添加密度曲线kde
ax_sub = sns.histplot(data=df, x='sepal length (cm)', kde=True, ax=ax[1][1])
ax_sub.set_title('添加密度曲线')
# 增加密度曲线和数据分布(小短条)
# rug参数用于绘制出一维数组中数据点实际的分布位置情况,单纯的将记录值在坐标轴上表现出来
ax_sub = sns.histplot(data=df, x="sepal length (cm)", kde=True, ax=ax[2][0])
sns.rugplot(data=df, x="sepal length (cm)", ax=ax_sub.axes)
ax_sub.set_title('添加kde+rug')
# 自定义密度曲线+自定义数据分布(kde+rug)
ax_sub = sns.histplot(data=df, x="sepal length (cm)", stat="density", ax=ax[2][1])
sns.kdeplot(data=df, x="sepal length (cm)", color="g", linewidth=5, alpha=0.3, ax=ax_sub.axes)
sns.rugplot(data=df, x="sepal length (cm)", color="r", linewidth=2, alpha=0.3, height=0.1, ax=ax_sub.axes)
ax_sub.set_title('自定义kde+rug')
plt.show()
# 构造子图
fig, ax = plt.subplots(3, 2, constrained_layout=True, figsize=(12, 12))
sns.histplot(data=df, ax=ax[0][0])
sns.histplot(data=df, shrink=.8, multiple='stack', ax=ax[0][1])
# 重叠
ax_sub = sns.histplot(data=df, x='sepal length (cm)', hue='target', ax=ax[1][0])
ax_sub.set_title('重叠(覆盖)图')
# 堆叠
ax_sub = sns.histplot(data=df, x='sepal length (cm)', hue='target', multiple='stack', ax=ax[1][1])
ax_sub.set_title('堆叠图')
sns.histplot(df, x='sepal length (cm)', hue='target', element='step', ax=ax[2][0])
sns.histplot(df, x='sepal length (cm)', hue='target', element='step', stat="density", ax=ax[2][1])
plt.show()
# 构造子图
fig, ax = plt.subplots(2, 1, constrained_layout=True, figsize=(4, 6))
sns.histplot(df, x='sepal length (cm)', y='petal length (cm)', ax=ax[0])
sns.histplot(df, x='sepal length (cm)', y='petal length (cm)', hue='target', ax=ax[1])
5、一图绘制多个变量
sns.histplot(df, x='sepal length (cm)', label='sepal length (cm)', kde=True)
sns.histplot(df, x='sepal width (cm)', label='sepal width (cm)', kde=True)
plt.legend()
plt.show()
# 直方图+散点图 :散点图可以观测两个变量的关系,直方图能够更好的展示数据分布
sns.jointplot(x=df["sepal length (cm)"], y=df["sepal width (cm)"], kind='scatter')