商业智能(Business Intelligence,简称:BI),又称商业智慧或商务智能,指用现代数据仓库技术、线上分析处理技术、数据挖掘和数据展现技术进行数据分析以实现商业价值。
——百度百科
文章目录
- 前言
- 一、痛点梳理
- 二、案例分享
- (一)获取BI系统用户访问日志
- (二)设计开发DWD
- (三)设计ADS
- (四)可视化样例
- 总结
前言
分析BI系统的用户访问数据分析报表设计开发流程及个人的思考,供各位小伙伴参考。
一、痛点梳理
- 不清楚BI系统中各报表的访问情况,哪些报表使用率高,哪些报表没人看。
- 不清楚哪些部门哪些人员使用报表的频率高,哪些部门哪些人员很少看报表。
- 不清楚BI系统中用户的注册激活情况。
二、案例分享
(一)获取BI系统用户访问日志
xxx_log
id | menu_full_path | user_id | user_email | create_date | write_date | pt_d |
---|---|---|---|---|---|---|
1 | aa分析表 | 111 | abcd@111.com | 2021-01-01 09:35:57 | 2022-11-30 09:35:57 | 2022-11-30 |
2 | aa分析表 | 111 | abcd@111.com | 2021-01-01 09:35:57 | 2022-11-30 09:42:08 | 2022-11-30 |
(二)设计开发DWD
- dwd_xxx_tf_dm (xxx用户操作事务事实表)
以天为分区,记录每个用户每天在BI系统上的浏览数据。 - dwd_xxx_tf_dm_ini (xxx用户操作事务事实表)
将历史的日志数据利用动态分区形成事实表历史分区数据,案例中dwd直接关联维度表冗余维度,实际生产中可以考虑在下游进行关联。
-- #####################################################
-- name : dwd_xxx_tf_dm.sql
-- desc : xxx用户操作事务事实表
-- version : xxx_BI_2.0.1
-- source : xxx_log; dim_aaa_ds; dim_bbb_ds;
-- create : zhangliushi 20220101
-- modify :
-- #####################################################
USE dwd;
SET hive.exec.dynamic.partition = true;
SET hive.exec.dynamic.partition.mode = nonstrict;
SET hive.exec.max.dynamic.partitions = 1000;
SET hive.exec.max.dynamic.partitions.pernode = 1000;
-- 记录BI用户日志数据;
CREATE EXTERNAL TABLE IF NOT EXISTS dwd.dwd_xxx_tf_dm
(
etl_id STRING COMMENT 'ETL代理ID'
,bi_user_id STRING COMMENT 'BI用户ID'
,user_email STRING COMMENT '用户邮箱'
,employee_code STRING COMMENT '雇员代码'
,employee_name STRING COMMENT '雇员名称'
,first_dept STRING COMMENT '一级部门'
,second_dept STRING COMMENT '二级部门'
,third_dept STRING COMMENT '三级部门'
,fourth_dept STRING COMMENT '四级部门'
,fifth_dept STRING COMMENT '五级部门'
,visit_time STRING COMMENT '访问时间'
,menu_full_path STRING COMMENT '访问菜单全路径'
,etl_time STRING COMMENT 'ETL时间'
,first_menu STRING COMMENT '一级菜单'
,second_menu STRING COMMENT '二级菜单'
,third_menu STRING COMMENT '三级菜单'
,fourth_menu STRING COMMENT '四级菜单'
)
COMMENT 'xxx用户操作事务事实表'
PARTITIONED BY (pt_d VARCHAR(10) COMMENT '天分区')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS ORC
LOCATION 'cosn://bpit-zzz-123456/dwd/dwd_xxx_tf_dm'
TBLPROPERTIES('orc.compress'='ZLIB')
;
-- 动态分区插入数据;
INSERT OVERWRITE TABLE dwd.dwd_xxx_tf_dm
PARTITION (pt_d)
SELECT
UUID() AS etl_id
,t1.user_id AS bi_user_id
,t1.user_email AS user_email
,t2.employee_code AS employee_code
,t2.employee_name AS employee_name
,t2.first_dept AS first_dept
,t2.second_dept AS second_dept
,t2.third_dept AS third_dept
,t2.fourth_dept AS fourth_dept
,t2.fifth_dept AS fifth_dept
,FROM_UTC_TIMESTAMP(DATE_FORMAT(t1.create_date, 'yyyy-MM-dd HH:mm:ss'), 'PRC') AS visit_time
,t1.menu_full_path AS menu_full_path
,DATE_FORMAT(CURRENT_TIMESTAMP,'yyyy-MM-dd HH:mm:ss') AS etl_time
,t3.first_menu AS first_menu
,t3.second_menu AS second_menu
,t3.third_menu AS third_menu
,t3.fourth_menu AS fourth_menu
,DATE_FORMAT(FROM_UTC_TIMESTAMP(DATE_FORMAT(t1.create_date, 'yyyy-MM-dd HH:mm:ss'), 'PRC'),'yyyy-MM-dd') AS pt_d
FROM
(
SELECT
menu_full_path
,user_id
,user_email
,create_date
FROM ods.xxx_log
WHERE pt_d = '2022-02-16'
AND user_email != 'admin'
) t1
LEFT OUTER JOIN
(
SELECT
employee_code
,employee_name
,first_dept
,second_dept
,third_dept
,fourth_dept
,fifth_dept
,employee_email
FROM dim.dim_aaa_ds
WHERE pt_d = DATE_SUB('${pt_d}',1)
) t2
ON t1.user_email = t2.employee_email
LEFT OUTER JOIN
(
SELECT
menu_full_path
,first_menu
,second_menu
,third_menu
,fourth_menu
FROM dim.dim_bbb_ds
) t3
ON t1.menu_full_path = t3.menu_full_path
;
- dwd_xxx_user_login_sf_ds(xx用户登录周期快照事实表)
案例中dwd直接关联维度表冗余维度,实际生产中可以考虑在下游进行关联。
-- #####################################################
-- name : dwd_xxx_user_login_sf_ds.sql
-- desc : xx用户登录周期快照事实表
-- version : xxx_BI_2.0.1
-- source : ods_xxx_bi_users; dim_xxx_ds;
-- create : zhangliushi 20220323
-- modify :
-- #####################################################
USE dwd;
-- 记录BI用户登录情况, 创建和修改时间;
CREATE EXTERNAL TABLE IF NOT EXISTS dwd.dwd_xxx_user_login_sf_ds
(
etl_id STRING COMMENT 'ETL代理ID'
,bi_user_id STRING COMMENT 'BI用户ID'
,user_email STRING COMMENT '用户邮箱'
,employee_code STRING COMMENT '雇员代码'
,employee_name STRING COMMENT '雇员名称'
,first_dept STRING COMMENT '一级部门'
,second_dept STRING COMMENT '二级部门'
,third_dept STRING COMMENT '三级部门'
,fourth_dept STRING COMMENT '四级部门'
,fifth_dept STRING COMMENT '五级部门'
,id_activation_flag STRING COMMENT '账号激活标志'
,id_cancel_flag STRING COMMENT '账号注销标志'
,last_login_time STRING COMMENT '最近一次登录时间'
,id_create_time STRING COMMENT '账号创建时间'
,id_modify_time STRING COMMENT '账号修改时间'
,etl_time STRING COMMENT 'ETL时间'
)
COMMENT 'xx用户登录周期快照事实表'
PARTITIONED BY (pt_d VARCHAR(10) COMMENT '天分区')
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n'
STORED AS ORC
LOCATION 'cosn://bpit-zzz-123456/dwd/dwd_xxx_user_login_sf_ds'
TBLPROPERTIES('orc.compress'='ZLIB')
;
INSERT OVERWRITE TABLE dwd.dwd_common_bi_user_login_sf_ds
PARTITION (pt_d = '${pt_d}')
SELECT
UUID() AS etl_id
,t1.id AS bi_user_id
,t1.user_email AS user_email
,t2.employee_code AS employee_code
,t2.employee_name AS employee_name
,t2.first_dept AS first_dept
,t2.second_dept AS second_dept
,t2.third_dept AS third_dept
,t2.fourth_dept AS fourth_dept
,t2.fifth_dept AS fifth_dept
,IF(t1.last_login_date IS NULL, '未激活', '已激活') AS id_activation_flag
,IF(t1.active IS TRUE, '账号可用', '账号已注销') AS id_cancel_flag
,FROM_UTC_TIMESTAMP(DATE_FORMAT(t1.last_login_date, 'yyyy-MM-dd HH:mm:ss'), 'PRC') AS last_login_time
,FROM_UTC_TIMESTAMP(DATE_FORMAT(t1.create_date, 'yyyy-MM-dd HH:mm:ss'), 'PRC') AS id_create_time
,FROM_UTC_TIMESTAMP(DATE_FORMAT(t1.write_date, 'yyyy-MM-dd HH:mm:ss'), 'PRC') AS id_modify_time
,DATE_FORMAT(CURRENT_TIMESTAMP,'yyyy-MM-dd HH:mm:ss') AS etl_time
FROM
(
SELECT
id
,user_email
,active
,last_login_date
,create_date
,write_date
FROM ods.ods_xxx_bi_users
WHERE pt_d = DATE_SUB('${pt_d}',1)
) t1
LEFT OUTER JOIN
(
SELECT
employee_code
,employee_name
,first_dept
,second_dept
,third_dept
,fourth_dept
,fifth_dept
,employee_email
FROM dim.dim_xxx_ds
WHERE pt_d = DATE_SUB('${pt_d}',1)
) t2
ON t1.user_email = t2.employee_email
;
(三)设计ADS
- 依赖xxx用户操作事务事实表形成用户操作统计表
- 依赖xx用户登录周期快照事实表形成用户登录统计表
- 依赖xx用户登录周期快照事实表和依赖xxx用户操作事务事实表形成注册用户访问统计表
注:样例中没有建DWS,实际生产可以考虑建DWS表,在DWS关联维度。
(四)可视化样例
总结
商业智能的概念在1996年最早由加特纳集团(Gartner Group)提出,加特纳集团将商业智能定义为:商业智能描述了一系列的概念和方法,通过应用基于事实的支持系统来辅助商业决策的制定。商业智能技术提供使企业迅速分析数据的技术和方法,包括收集、管理和分析数据,将这些数据转化为有用的信息,然后分发到企业各处。