一、概述
数据集: 537577行 × 12列。
二、可视化
1、年龄/性别的各自用户占比
import pandas as pd
from pyecharts.charts import Pie, Bar, Grid, Page, Boxplot
import pyecharts.options as opts
df = pd.read_csv('./BlackFridaySales.csv')
age_data = df['Age'].value_counts().to_dict().items()
gender_data = df['Gender'].value_counts().to_dict().items()
gender_age_data = df.groupby(['Gender', 'Age']).agg({'User_ID': pd.Series.nunique})
print(gender_age_data.loc['F', :]['User_ID'])
pie = Pie()
pie2 = Pie()
pie.add('', list(age_data), radius=['20%','30%'],center=['10%','50%'], label_opts=opts.LabelOpts(formatter='{d}%'))
pie.set_global_opts(title_opts=opts.TitleOpts(title='用户的年龄占比', pos_left='3%'),
legend_opts=opts.LegendOpts(is_show=False))
pie.render('aaa.html')
pie2.add('', list(gender_data), radius=['20%','30%'],center=['30%','50%'], label_opts=opts.LabelOpts(formatter='{b}:{d}%', position='inside'))
pie2.set_global_opts(title_opts=opts.TitleOpts(title='用户的性别占比', pos_left='23%'),
legend_opts=opts.LegendOpts(is_show=False))
pie2.render('bbb.html')
bar = Bar()
bar.add_xaxis(list(gender_age_data.loc['F', :].index))
bar.add_yaxis('F', list(gender_age_data.loc['F', :].User_ID), stack='s1', label_opts=opts.LabelOpts(position='inside'))
bar.add_yaxis('M', list(gender_age_data.loc['M', :].User_ID), stack='s1', label_opts=opts.LabelOpts(position='inside'))
bar.set_global_opts(title_opts=opts.TitleOpts(title='用户性别&年龄占比',pos_left='60%'),
legend_opts=opts.LegendOpts(pos_right='right'))
bar.render('ccc.html')
水平展现多图
grid = Grid()
grid.add(bar, grid_opts=opts.GridOpts(pos_left='45%'))
grid.add(pie, grid_opts=opts.GridOpts(pos_left='10'))
grid.add(pie2, grid_opts=opts.GridOpts(pos_left='30%'))
grid.render('ddd.html')
垂直展现多图
page = Page()
page.add(pie, pie2, bar)
page.render('eee.html')
2、婚姻状况/城市类别/消费情况的占比
marriage = df['Marital_Status'].value_counts()
print(marriage.index)
marriage.rename(index={0: '未婚', 1: '已婚'}, inplace=True)
marriage_data = list(marriage.to_dict().items())
print(marriage)
p1 = Pie()
p1.add('', marriage_data, radius=['20%', '30%'], center=['20%','50%'], label_opts=opts.LabelOpts(formatter='{b}:{d}%'))
p1.set_global_opts(title_opts=opts.TitleOpts(title='用户婚姻状况占比'),
legend_opts=opts.LegendOpts(type_='plain', pos_top='10%', pos_left='left'))
p1.render('fff.html')
city_data = df['City_Category'].value_counts().to_dict().items()
print(city_data)
p2 = Pie()
p2.add('', list(city_data), radius=['20%', '30%'], center=['50%','50%'], label_opts=opts.LabelOpts(formatter='{b}:{d}%'))
p2.set_global_opts(title_opts=opts.TitleOpts(title='城市类别占比',pos_right='45%'),
legend_opts=opts.LegendOpts(pos_top='10%', pos_right='right'))
pie.render('ggg.html')
purchase_data = df.groupby(['City_Category']).agg({'Purchase': sum}).to_dict()['Purchase'].items()
print(purchase_data)
p3 = Pie()
p3.add('',list(purchase_data), radius=['20%','30%'],center=['80%','50%'],label_opts=opts.LabelOpts(formatter='{b}:{c}$'))
p3.set_global_opts(title_opts=opts.TitleOpts('消费占比-城市类别', pos_right='right'),
legend_opts=opts.LegendOpts(is_show=False))
p3.render('hhh.html')
''' 水平多图 '''
grid = Grid()
grid.add(p1,grid_opts=opts.GridOpts(pos_left='10%'))
grid.add(p2,grid_opts=opts.GridOpts(pos_left='30%'))
grid.add(p3,grid_opts=opts.GridOpts(pos_left='70%'))
grid.render('iii.html')
3、当前城市停留时间&性别的用户对应的平均消费
data = df.groupby(['Gender','Stay_In_Current_City_Years']).Purchase.mean().round(0)
print(list(data.loc['F', :]))
b = Bar()
b.add_xaxis(list(data.loc['F', :].index))
b.add_yaxis('F', list(data.loc['F', :]), color='#FFCF43')
b.add_yaxis('M', list(data.loc['M', :]), color='#6CBEFD')
b.set_global_opts(title_opts=opts.TitleOpts(title='当前城市停留时间&性别的用户对应的平均消费', pos_left='center'),
xaxis_opts=opts.AxisOpts(name='当前城市\n停留时间'),
yaxis_opts=opts.AxisOpts(name='平均消费', min_=8600),
legend_opts=opts.LegendOpts(pos_top='7%'))
b.render('jjj.html')
4、箱型图-购买力(性别/年龄)
f = df.groupby(['Gender','Age','User_ID']).Purchase.mean().round(0).reset_index()
data_x = f.Age.unique().tolist()
data_f = [list(f[(f.Gender == 'F') & (f.Age == i)].Purchase) for i in data_x]
data_m = [list(f[(f.Gender == 'M') & (f.Age == i)].Purchase) for i in data_x]
bp = Boxplot()
bp.add_xaxis(data_x)
''' bp.prepare_data:依次计算箱子的最小值、Q1、中位数、Q3、最大值
opts.ItemStyleOpts.border_color的颜色与图例的颜色是一致的 '''
bp.add_yaxis('F',bp.prepare_data(data_f),
itemstyle_opts=opts.ItemStyleOpts(color='#EEFDD3',border_color='#56C71C'))
bp.add_yaxis('M',bp.prepare_data(data_m),
itemstyle_opts=opts.ItemStyleOpts(color='#FEF8D8',border_color='#D6A12D'))
bp.set_global_opts(legend_opts=opts.LegendOpts(legend_icon='circle'))
bp.render('kkk.html')