数据分析:SQL和Python

news2024/11/16 1:27:31

SQL

统计数据概况:计算样本总数、商家总数、用户总数、消费总数、领券总数等

select
    count(User_id) as '样本总数',
    count(distinct Merchant_id) as '商家总数',
    count(distinct User_id) as '用户总数',
    count(Date) as '消费总数',
    count(Date_received) as '领券总数',
    (select count(*) from ddm.offline_train as a where a.Date_received is not null and a.Date is not null) as '领券消费总数',
    (select count(*) from ddm.offline_train as a where a.Date_received is null and a.Date is not null) as '无券消费总数',
    (select count(*) from ddm.offline_train as a where a.Date_received is not null and a.Date is not null)/count(Date_received) as '核销率'
from ddm.offline_train

统计不同距离下:领券人数、用券消费人数、核销率

# 查找各距离的领券人数/用券消费人数/核销率
select
    Distance,
    count(Coupon_id) as get_coupons_num,
    sum(if(Date_received is not null and Date is not null,1,0)) as user_coupons_num,
    sum(if(Date_received is not null and Date is not null,1,0)) /count(Coupon_id) as use_coupons_rate
from ddm.offline_train
where Distance is not null
group by Distance
order by distance

 消费券使用情况占比

# 消费券使用情况占比
with temp as (
    select
        case
            when Date_received is not null and Date is not null then '有券消费'
            when Date_received is not null and Date is null then '有券未消费'
            when Date_received is null and Date is not null then '无券消费'
        end as flag
    from ddm.offline_train
)
select
    flag as '优惠券使用情况',
    concat(round(count(flag)/(select count(*) from temp)*100,2),'%') as '百分比'
from temp
group by flag
order by count(flag)/(select count(*) from temp)

 

 with as 也叫做子查询部分,类似于一个视图或临时表,可以用来存储一部分的sql语句查询结果,必须和其他的查询语句一起使用,且中间不能有分号,目前在oracle、sql server、hive等均支持 with as 用法,但 mysql并不支持!

不同类型优惠券的核销情况和平均领取距离

# 不同优惠券类型的核销情况和平均领取距离
select
    Discount_rate as '折扣',
    avg(Distance) as '平均距离',
    count(Date_received) as '领券人数',
    sum(if(Date_received is not null and Date is not null,1,0)) as '有券消费人数',
    sum(if(Date_received is not null and Date is not null,1,0))/count(Date_received) as '核销率'
from ddm.offline_train
where Date_received is not null
group by Discount_rate
order by '有券消费人数' desc

不同满减门槛的核销情况

# 不同满减门槛的核销情况
select
    mk as '门槛',
    count(*) as '领券数量',
    sum(if(Date is not null,1,0)) as '用券消费数量',
    concat(round(sum(if(Date is not null,1,0))/count(*)*100,2),'%') as '核销率'
from(select
         DATE,
         convert(if(Discount_rate like '%.%',0,Discount_rate),signed) as mk
    from ddm.offline_train) as aa
where mk is not null
group by mk
order by mk

不同核销率的商家分布情况(占比)

# 不同核销率用户分布
with temp as (
    select
        Merchant_id,
        count(Date_received) as get_num,
        sum(if(Date is not null and Date_received is not null,1,0)) as use_num,
        sum(if(Date is not null and Date_received is not null,1,0))/count(Date_received) as Merchant_rate
    from ddm.offline_train
    where Date_received is not null
    group by Merchant_id
)
 
 
select
    tag,
    concat(round(count(*)/(select count(*) from temp)*100,2),'%') as Merchant_percent
from(
    select
        Merchant_id,
        case
            when Merchant_rate = 0 then '核销率:0'
            when Merchant_rate > 0 and Merchant_rate < 0.2 then '核销率:0-20%'
            when Merchant_rate >= 0.2 and Merchant_rate< 0.3 then '核销率:20%-30%'
            when Merchant_rate >= 0.3 and Merchant_rate< 0.5 then '核销率:30%-50%'
            when Merchant_rate >= 0.5 then '核销率:50%以上'
        end as tag
    from temp
    )aa
group by tag
order by Merchant_percent desc

不同领券次数商家的分布情况(平均核销率/占比) 

# 不同领券次数用户分布-平均核销率/占比
with temp as (
    select
        Merchant_id,
        count(Date_received) as get_num,
        sum(if(Date is not null and Date_received is not null,1,0))/count(Date_received) as user_rate,
        sum(if(Date is not null and Date_received is not null,1,0)) as use_num,
        case
            when count(Date_received)>100 then '100次以上'
            when count(Date_received)=0 then '0次'
            when count(Date_received) between 1 and 10 then '1-10次'
            when count(Date_received) between 11 and 50 then '11-50次'
            when count(Date_received) between 51 and 100 then '51-100次'
            else '其他次'
        end as flag
    from ddm.offline_train
    group by Merchant_id
)
 
select
    flag as '被领券次数',
    concat(round(avg(user_rate)*100,2),'%') as Merchant_avg_use_rate,
    concat(round(count(*)/(select count(*) from temp)*100,2),'%') as Merchant_percent
from temp
group by flag
order by (count(*)/(select count(*) from temp)) desc

不同核销率用户分布(占比)

# 不同核销率用户分布
with temp as (
    select
        User_id,
        count(Date_received) as get_num,
        sum(if(Date is not null and Date_received is not null,1,0)) as use_num,
        sum(if(Date is not null and Date_received is not null,1,0))/count(Date_received) as user_rate
    from ddm.offline_train
    where Date_received is not null
    group by User_id
)
 
 
select
    tag,
    concat(round(count(*)/(select count(*) from temp)*100,2),'%') as user_percent
from(
    select
        User_id,
        case
            when user_rate = 0 then '核销率:0'
            when user_rate > 0 and user_rate < 0.3 then '核销率:0-30%'
            when user_rate >= 0.3 and user_rate< 0.5 then '核销率:30%-50%'
            when user_rate >= 0.5 then '核销率:50%以上'
        end as tag
    from temp
    )aa
group by tag
order by user_percent desc

不同月份优惠券领券次数/核销次数/核销率

# 不同月份领券次数/核销次数/核销率
select
    `month`,
    coupons_get_num,
    coupons_use_num,
    concat(round(coupons_use_num/coupons_get_num*100,2),'%') as coupons_use_rate
from(select
        month(Date_received) as `month`,
        count(*) as coupons_get_num
    from ddm.offline_train
    where Date_received is not null
    group by month(Date_received)) as a
inner join(
    select
        month(Date) as `month`,
        count(*) as coupons_use_num
    from ddm.offline_train
    where Date_received is not null and Date is not null
    group by month(Date)
)as b using(`month`)
order by `month`

不同工作日的优惠券平均核销周期、核销率

# 工作日平均核销间隔、核销率
with get_coupons as(
    select
        weekday(Date_received)+1 as coupons_day,
        count(*) as coupons_get_num
    from ddm.offline_train
    where Date_received is not null
    group by weekday(Date_received)+1
),
use_coupons as(
    select
        weekday(Date)+1 as coupons_day,
        count(*) as coupons_use_num,
        round(avg(datediff(Date,Date_received)),2) as use_interval
    from ddm.offline_train
    where Date is not null and Date_received is not null
    group by weekday(Date)+1
)
 
select
    coupons_day,
    use_interval,
    concat(round(coupons_use_num/coupons_get_num*100,2),'%') as coupons_use_rate
from get_coupons
inner join use_coupons using(coupons_day)
order by coupons_day

 Python

pandas:是Python中处理csv文件常用的一个库

数据预处理

data = pd.read_csv(r"/Users/xll/Documents/服务数据质量/测试数据1.csv")

#返回数据集行和列的元组,其中data.shape[0]代表返回行数,data.shape[1] 代表返回列数
print(data.shape) 

#随机返回样本5行
data.sample(5)

#返回前5行
print(data.head(5))

#返回数据集的所有列名
data.columns

 

 

 参考:

https://blog.csdn.net/twlve/article/details/128609147?spm=1001.2014.3001.5502

https://blog.csdn.net/twlve/article/details/128626526?spm=1001.2014.3001.5502

O2O优惠券核销-数据分析_十二十二呀的博客-CSDN博客_优惠券数据分析

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/184903.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

高级Spring之Aware 接口

Aware 接口功能阐述&#xff1a; Aware 接口提供了一种【内置】 的注入手段&#xff0c;例如 a.BeanNameAware 注入 bean 的名字b.BeanFactoryAware 注入 BeanFactory 容器 c.ApplicationContextAware 注入 ApplicationContext 容器 d.EmbeddedValueResolverAware 注入 ${} 解…

力扣并查集

目录 1&#xff0c;路径压缩查找 323. 无向图中连通分量的数目 990. 等式方程的可满足性 200. 岛屿数量 419. 甲板上的战舰 695. 岛屿的最大面积 733. 图像渲染 1992. 找到所有的农场组 947. 移除最多的同行或同列石头 1020. 飞地的数量 547. 省份数量 827. 最大人…

浅谈对 Binder 的理解

文章目录Binder 是干嘛的&#xff1f;Binder 的意义binder 的通信架构如何启动 binder 机制binder 通信binder 驱动的分层架构图需要了解的知识 binder 是干嘛的binder 的存在意义是怎样的Android 为什么选择 binder 作为主要的 ipc 通信机制&#xff1f;binder 的架构 Binde…

Vue 3 介绍

Vue 3 介绍Vue3 动机 和 新特性Vite 的使用为什么选 Vite为什么要学习 vue 3Vue是国内最火的前端框架Vue 3 于 2022 年 2 月 7 日星期一成为新的默认版本&#xff01;Vue3性能更高&#xff0c;体积更小Vue3.0在经过一年的迭代后&#xff0c;越来越好用 目前已支持 vue3 的UI组…

Python简介安装与PyCharm安装使用

一、Python简介 Python官方文档&#xff1a;https://docs.python.org/zh-cn/3/ Python 3.x 已经将 UTF-8 作为默认的源文件编码格式。 1、Python简介 Python 是一门开源免费、通用型的脚本编程语言&#xff0c;它上手简单&#xff0c;功能强大&#xff0c;坚持「极简主义」&am…

Visual Studio Code (VS Code) - 列块编辑与查找

Visual Studio Code (VS Code) - 列块编辑与查找1. 列块选择 Shift Alt 鼠标左键&#xff0c;进行列编辑&#xff0c;批量增删改。 在选定位置按下滚轮键 (鼠标中键) 不放&#xff0c;移动鼠标完成多列选择。 2. 页面双列布局 查看 -> 编辑器布局 -> 双列 3. 查…

linux使用ZLMediaKit搭建rtsp服务器

入门小菜鸟&#xff0c;希望像做笔记记录自己学的东西&#xff0c;也希望能帮助到同样入门的人&#xff0c;更希望大佬们帮忙纠错啦~侵权立删。 致敬rtsp 服务器搭建_音视频开发老马的博客-CSDN博客_rtsp服务器 这里是参考上面链接的介绍再进行一些自己遇到的问题的补充。 一…

一种化学荧光探针945928-17-6,TAMRA alkyne,5-isomer,四甲基罗丹明-炔基

【中文名称】5-四甲基罗丹明-炔基【英文名称】 TAMRA alkyne,5-isomer【CAS】945928-17-6【分子式】C28H25N3O4【分子量】467.53【纯度标准】95%【包装规格】5mg&#xff0c;10mg&#xff0c;25mg【是否接受定制】可进行定制&#xff0c;定制时间周期上面可以和我们进行沟通【外…

redis常用数据类型和应用场景

我们都知道 Redis 提供了丰富的数据类型&#xff0c;常见的有五种&#xff1a;String&#xff08;字符串&#xff09;&#xff0c;Hash&#xff08;哈希&#xff09;&#xff0c;List&#xff08;列表&#xff09;&#xff0c;Set&#xff08;集合&#xff09;、Zset&#xff0…

稀疏表(ST表,Sparse Table)

ST表用来解决区间最值问题&#xff08;也可以解决区间gcd&#xff09; 利用倍增的思想&#xff0c;O(nlog⁡2n)O\left(n\log_2 n\right)O(nlog2​n)预处理&#xff0c;O(1)O\left(1\right)O(1)区间查询 令f(i,j)f\left(i,j\right)f(i,j)表示区间[i,i2j−1]\left[i,i2^j-1\right…

TypeScript 学习笔记总结(二)

TypeScript 笔记记录&#xff0c;侧重于接口&#xff0c;对象等内容。 文章目录一、 TS 面向对象二、TS 类三、TS 继承四、TS super关键字五、TS 抽象类六、TS 接口七、TS 属性封装八、TS 泛型一、 TS 面向对象 js也是面向对象的&#xff0c;并不是面向过程的。 下面&#xf…

虹科案例 | AR数字化解决方案在石油与天然气领域“大放异彩”

石油和天然气在当今人类社会中扮演着重要角色&#xff0c;但是石油和天然气的开采&#xff0c;却是耗费成本巨大的工程&#xff0c;石油和天然气公司也在不断寻找着能帮助他们降低运营成本并提高效率的好方法。 事实上&#xff0c;AR技术解决方案能帮助这些公司实现他们的目标…

Linux[安装gitlab笔记]

参考文章&#xff1a;https://www.jianshu.com/p/2cb10c11813d CentOS7下安装gitlab中文版 前提&#xff1a; 下载文件&#xff1a;gitlab-ce-12.9.2-ce.0.el7.x86_64.rpm 地址1&#xff1a;https://packages.gitlab.com/gitlab/gitlab-ce 地址2&#xff1a;https://mirrors.…

JVM——类加载与字节码技术(3)

目录四、类加载阶段4.1 加载4.2 链接4.3 初始化五、类加载——练习练习1练习2四、类加载阶段 4.1 加载 ① 将类的字节码载入方法区&#xff08;1.8后为元空间&#xff0c;在本地内存中&#xff09;中&#xff0c;内部采用 C 的 instanceKlass ● _java_mirror 即 java 的类镜…

大数据技术架构(组件)10——Hive:集合函数类型转化函数

1.4.3、集合函数1.4.3.1、size select map(a,1,b,2),size(map(a,1,b,2)),array(1,2,3,4),size(array(1,2,3,4));1.4.3.2、map_keysselect map(a,1,b,2), map_keys(map(a,1,b,2));1.4.3.3、map_valuesselect map(a,1,b,2), map_values(map(a,1,b,2));1.4.3.4、array_containssel…

中国国际电子商务中心与易观分析联合发布:2022年3季度全国网络零售发展指数同比增长1.5%

近日&#xff0c;中国国际电子商务中心与易观分析联合发布2022年3季度“全国网络零售发展指数”及其分指数。2022年3季度全国网络零售发展指数同比增长1.5%&#xff0c;环比下降2.9%。随着稳经济一揽子政策和接续措施全面落地显效&#xff0c;生产加快回暖&#xff0c;经济平稳…

vmstat、free、df、iostat、sar

1. vmstat看CPU vmstat -n 2 3 一般vmstat工具的使用是通过两个数字参数来完成的&#xff0c;第一个参数是采样的时间间隔数单位是秒&#xff0c;第二个参数是采样的次数 -procs r:运行和等待CPU时间片的进程数&#xff0c;原则上1核的CPU的运行队列不要超过2&#xff0c;整个…

Softing为Endress+Hauser提供过程自动化连接解决方案

一 背景 恩德斯豪斯&#xff08;EndressHauser&#xff09;是一家总部位于瑞士的过程工业自动化解决方案的全球领军企业&#xff0c;致力于为过程工业及实验室自动化领域提供测量仪器、服务和解决方案。其产品被广泛应用于石油、化工、制药、食品饮料以及废水处理等过程自动化…

银河麒麟V10操控系统Qt安装

下载安装镜像申请试用https://www.kylinos.cn/support/trial.html&#xff0c;根据CPU架构选择要下载的安装包。AMD、Intel的CPU下载银河麒麟桌面操作系统V10 AMD64版本&#xff0c;我下载的Kylin-Desktop-V10-SP1-HWE-Release-2203-X86_64.iso制作系统安装启动U盘下载制作启动…

1. kafka-提高生产者吞吐量

kafka的基础使用知识不在这篇文章阐述&#xff0c;默认读者都会使用 首先kafka的生产者原理&#xff0c;如下。图来自网上 上面的图可能不全&#xff0c; 我再详细描述下&#xff0c;在消息发送过程中&#xff0c;设计到两个线程main和sender线程&#xff1b; &#xff08;1…