【开端】clickhouse入门使用

news2024/9/20 20:27:00

一、绪论

这两天使用clickhouse进行数据分析,在使用上和mysql等关系型数据库还是有区别的,在SQL语法上也有差别,所以这里总结一下使用。

二、clickhouse入门使用

ClickHouse介绍

ClickHouse是俄罗斯的Yandex公司于2016年开源的列式存储数据库(DBMS),它使用C++语言编写,主要面向在线分析处理查询(OLAP),能够使用SQL查询实时生成分析数据报告。ClickHouse在数据处理和查询性能上表现优异,尤其适用于大数据量的实时分析场景。

主要特点
  1. 列式存储
    • 相较于传统的行式存储,列式存储在处理大量数据的聚合、计数、求和等统计操作时具有显著优势。
    • 由于同一列的数据类型相同,因此更容易进行数据压缩,节省磁盘空间并提高缓存效率。
  2. 高性能写入
    • ClickHouse采用类LSM Tree的结构,数据写入后定期在后台进行Compaction,实现高效的顺序写操作。
    • 官方公开的benchmark测试显示,其写入吞吐能力可达50MB-200MB/s,相当于每秒写入50万至200万条数据。
  3. 高并行处理能力
    • ClickHouse将数据划分为多个partition和index granularity,通过多个CPU核心并行处理查询,极大地降低了查询延时。
    • 然而,需要注意的是,对于高并发查询业务,ClickHouse可能不是最佳选择,因为它倾向于使用多CPU处理单条查询。
  4. 灵活的存储引擎
    • ClickHouse支持多样化的存储引擎,根据表的不同需求可以设定不同的存储引擎,以满足不同的应用场景。
  5. 几乎覆盖标准SQL语法
    • ClickHouse支持包括DDL和DML在内的标准SQL语法,以及配套的各种函数、用户管理及权限管理、数据的备份与恢复等功能。
局限性与不足
  • 不支持事务:ClickHouse不支持传统的ACID事务,也不支持真正的删除/更新操作。
  • 不支持高并发:官方建议的QPS(每秒查询率)为100,对于需要高并发的应用场景可能不是最佳选择。
  • 不支持二级索引:ClickHouse主要依赖分区和索引粒度来实现数据的快速访问,但不支持传统的二级索引结构。

ClickHouse的SQL用法

ClickHouse支持丰富的SQL语法,以下是一些常用的SQL操作示例:

--查询集群
SELECT * FROM system.clusters;


drop table cdf_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;

--有交易的会员
CREATE TABLE data_ods.cdf_ordr_user_cdj_01 ON CLUSTER default_cluster 
(

    `user_id` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_01',
 '{replica}')
ORDER BY user_id
SETTINGS index_granularity = 8192


truncate cdf_ordr_user_cdj_01;
insert into cdf_ordr_user_cdj_01
SELECT DISTINCT user_id from data_ods.ctg_cdf_order_item_stat 
where  transaction_time >='2023-09-01 00:00:00' and transaction_time <'2024-09-01 00:00:00'  ;


--member_travel 有交易的会员
drop table travel_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;

CREATE TABLE data_ods.travel_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `contact_tel_ciphertext` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_001',
 '{replica}')
ORDER BY contact_tel_ciphertext
SETTINGS index_granularity = 8192

--7660
select  count(*) from  travel_ordr_user_cdj_01;
truncate travel_ordr_user_cdj_01;
insert into travel_ordr_user_cdj_01
SELECT DISTINCT contact_tel_ciphertext from  data_ods.ctg_travel_order_base_info
where  order_date >='2023-09-01 00:00:00' and order_date <'2024-09-01 00:00:00' 
and   contact_tel_ciphertext is not null  ;
 

--hotel 有交易的会员
drop table hotel_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;
CREATE TABLE data_ods.hotel_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `userId` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotel_ordr_user_cdj_01',
 '{replica}')
ORDER BY userId
SETTINGS index_granularity = 8192
--4188
select  count(*) from  hotel_ordr_user_cdj_01;
truncate hotel_ordr_user_cdj_01;
insert into hotel_ordr_user_cdj_01
SELECT DISTINCT loy_mem_id as  userId from data_ods.ctg_htl_s_hotel_bill 
where  created >='2023-09-01 00:00:00' and created <'2024-09-01 00:00:00' ;

insert into hotel_ordr_user_cdj_01
SELECT DISTINCT userId from data_ods.ctg_htl_mt_order 
where  addTime >='2023-09-01 00:00:00'
and addTime <'2024-09-01 00:00:00'
and  userId is not  null;

--touzi 有交易的会员
drop table tz_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `user_id` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_01',
 '{replica}')
ORDER BY user_id
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_01;
truncate tz_ordr_user_cdj_01;
insert into tz_ordr_user_cdj_01
select DISTINCT id from (
select  toString(t1.id) AS id   from  data_ods.ctg_invest_member t1
join   data_ods.ctg_mall_member t2
on  toString(t1.id) = t2.id 
where  
t2.create_time >='2023-09-01 00:00:00'
and 
t2.create_time <'2024-09-01 00:00:00' ) t3 ;

--有交易的会员手机号
drop table cdf_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.cdf_ordr_user_cdj_02 ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  cdf_ordr_user_cdj_02;

truncate cdf_ordr_user_cdj_02;
insert into cdf_ordr_user_cdj_02
select  t1.cellphone 
from data_ods.ctg_cdf_member t1
join data_ods.cdf_ordr_user_cdj_01  t2
on toString(t1.userid) = t2.user_id
where t1.cellphone is not  null and  t1.cellphone <>'';

--member_travel 有交易的会员手机号
drop table travel_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.travel_ordr_user_cdj_02  ON CLUSTER default_cluster
(

    `phone_ciphertext` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_02',
 '{replica}')
ORDER BY phone_ciphertext
SETTINGS index_granularity = 8192;


select  count(*) from  travel_ordr_user_cdj_02;

truncate travel_ordr_user_cdj_02;
insert into travel_ordr_user_cdj_02
select  t1.phone_ciphertext  as cellphone
from  data_ods.ctg_travel_member  t1
join data_ods.travel_ordr_user_cdj_01  t2
on t1.phone_ciphertext = t2.contact_tel_ciphertext;


--hotel 有交易的会员手机号

drop table hotel_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 
CREATE TABLE data_ods.hotel_ordr_user_cdj_02  ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotel_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  hotel_ordr_user_cdj_02;
truncate hotel_ordr_user_cdj_02;
insert into hotel_ordr_user_cdj_02
select  t1.cellphone
from   data_ods.ctg_htl_s_hotel_member  t1
join data_ods.hotel_ordr_user_cdj_01  t2
on t1.row_id = t2.userId;

--touzi 有交易的会员手机号
drop table tz_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_02 ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_02;
truncate tz_ordr_user_cdj_02;

insert into tz_ordr_user_cdj_02
select  t1.mobile as cellphone
from    data_ods.ctg_invest_member  t1
join data_ods.tz_ordr_user_cdj_01  t2
on toString(t1.id) = t2.user_id;
 

--交易用户总数据量:     9301479
drop table ordr_user_cdj_03  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.ordr_user_cdj_03 ON CLUSTER default_cluster

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/ordr_user_cdj_03',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;


select  count(*) from  ordr_user_cdj_03;
truncate ordr_user_cdj_03;
insert into ordr_user_cdj_03
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,'中旅免税' as bgname  FROM  cdf_ordr_user_cdj_02 a   
UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM  travel_ordr_user_cdj_02  b 
UNION ALL
SELECT   c.cellphone AS phone ,'中旅酒店' as bgname  FROM   hotel_ordr_user_cdj_02 c  
UNION ALL
SELECT   d.cellphone AS phone ,'中旅投资' as bgname  FROM    tz_ordr_user_cdj_02 d ) t

--酒店会员重叠 

drop table hotle_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.hotle_ordr_user_cdj_04 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotle_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  hotle_ordr_user_cdj_04;
truncate hotle_ordr_user_cdj_04;

insert into hotle_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM      data_ods.ctg_cdf_member a   
UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member d 
 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y 
inner join (SELECT distinct cellphone from data_ods.ctg_htl_s_hotel_member) cc
on y.phone = cc.cellphone ;
 

--旅行会员重叠 
drop table travel_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 


CREATE TABLE data_ods.travel_ordr_user_cdj_04  ON CLUSTER default_cluster 
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  travel_ordr_user_cdj_04;
truncate travel_ordr_user_cdj_04;

insert into travel_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y 
inner join (SELECT distinct phone_ciphertext from data_ods.ctg_travel_member) cc
on y.phone = cc.phone_ciphertext;

--会员重叠 


drop table cdf_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 


CREATE TABLE data_ods.cdf_ordr_user_cdj_04  ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;


select  count(*) from  cdf_ordr_user_cdj_04;
truncate cdf_ordr_user_cdj_04;
insert into cdf_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y
inner join  (SELECT distinct cellphone from data_ods.ctg_cdf_member) cc
on phone = cc.cellphone;

--投资会员重叠 
drop table tz_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_04 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_04;
truncate tz_ordr_user_cdj_04;


insert into tz_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y
inner join (SELECT distinct mobile from data_ods.ctg_invest_member) cc
on y.phone = cc.mobile;


--酒店重叠的有交易会员
drop table hotle_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.hotle_ordr_user_cdj_05 ON CLUSTER default_cluster 
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotle_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from   data_ods.hotle_ordr_user_cdj_05;
truncate hotle_ordr_user_cdj_05;

insert into data_ods.hotle_ordr_user_cdj_05
select  distinct t1.phone   from  data_ods.hotle_ordr_user_cdj_04 t1   
join data_ods.ordr_user_cdj_03  t2 
on t1.phone = t2.phone;


--旅行重叠的有交易会员
drop table travel_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.travel_ordr_user_cdj_05 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  travel_ordr_user_cdj_05;
truncate travel_ordr_user_cdj_05;

insert into data_ods.travel_ordr_user_cdj_05
select  distinct t1.phone   from travel_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;

--重叠的有交易会员
drop table cdf_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.cdf_ordr_user_cdj_05  ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  cdf_ordr_user_cdj_05;
truncate cdf_ordr_user_cdj_05;

insert into data_ods.cdf_ordr_user_cdj_05
select  distinct t1.phone   from cdf_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;

--投资重叠的有交易会员
drop table tz_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_05 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_05;
truncate tz_ordr_user_cdj_05;
insert into data_ods.tz_ordr_user_cdj_05
select  distinct  t1.phone   from tz_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;


-重叠率计算
--酒店: 322903
select  count(1) from   hotle_ordr_user_cdj_05;
--旅行:287291
select  count(1) from   travel_ordr_user_cdj_05;
--cdf:702559
select  count(1) from   cdf_ordr_user_cdj_05;
--投资:159162
select  count(1) from   tz_ordr_user_cdj_05;

--总数: 9045900
select  count(1) from ordr_user_cdj_03;

--去重: 571211
drop table ordr_user_cdj_06  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.ordr_user_cdj_06 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/ordr_user_cdj_06',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  ordr_user_cdj_06;
truncate ordr_user_cdj_06;

insert into data_ods.ordr_user_cdj_06
select distinct phone from (
select phone from   hotle_ordr_user_cdj_05
union all
select  phone from   travel_ordr_user_cdj_05
union all
select  phone from   cdf_ordr_user_cdj_05
union all
select  phone  from   tz_ordr_user_cdj_05) t

select  count(1) from   ordr_user_cdj_06;
 

--查询集群
SELECT * FROM system.clusters;


drop table cdf_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;

--有交易的会员
CREATE TABLE data_ods.cdf_ordr_user_cdj_01 ON CLUSTER default_cluster 
(

    `user_id` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_01',
 '{replica}')
ORDER BY user_id
SETTINGS index_granularity = 8192


truncate cdf_ordr_user_cdj_01;
insert into cdf_ordr_user_cdj_01
SELECT DISTINCT user_id from data_ods.ctg_cdf_order_item_stat 
where  transaction_time >='2023-09-01 00:00:00' and transaction_time <'2024-09-01 00:00:00'  ;


--member_travel 有交易的会员
drop table travel_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;

CREATE TABLE data_ods.travel_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `contact_tel_ciphertext` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_001',
 '{replica}')
ORDER BY contact_tel_ciphertext
SETTINGS index_granularity = 8192

--7660
select  count(*) from  travel_ordr_user_cdj_01;
truncate travel_ordr_user_cdj_01;
insert into travel_ordr_user_cdj_01
SELECT DISTINCT contact_tel_ciphertext from  data_ods.ctg_travel_order_base_info
where  order_date >='2023-09-01 00:00:00' and order_date <'2024-09-01 00:00:00' 
and   contact_tel_ciphertext is not null  ;
 

--hotel 有交易的会员
drop table hotel_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC;
CREATE TABLE data_ods.hotel_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `userId` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotel_ordr_user_cdj_01',
 '{replica}')
ORDER BY userId
SETTINGS index_granularity = 8192
--4188
select  count(*) from  hotel_ordr_user_cdj_01;
truncate hotel_ordr_user_cdj_01;
insert into hotel_ordr_user_cdj_01
SELECT DISTINCT loy_mem_id as  userId from data_ods.ctg_htl_s_hotel_bill 
where  created >='2023-09-01 00:00:00' and created <'2024-09-01 00:00:00' ;

insert into hotel_ordr_user_cdj_01
SELECT DISTINCT userId from data_ods.ctg_htl_mt_order 
where  addTime >='2023-09-01 00:00:00'
and addTime <'2024-09-01 00:00:00'
and  userId is not  null;

--touzi 有交易的会员
drop table tz_ordr_user_cdj_01 ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_01 ON CLUSTER default_cluster
(

    `user_id` String COMMENT ''
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_01',
 '{replica}')
ORDER BY user_id
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_01;
truncate tz_ordr_user_cdj_01;
insert into tz_ordr_user_cdj_01
select DISTINCT id from (
select  toString(t1.id) AS id   from  data_ods.ctg_invest_member t1
join   data_ods.ctg_mall_member t2
on  toString(t1.id) = t2.id 
where  
t2.create_time >='2023-09-01 00:00:00'
and 
t2.create_time <'2024-09-01 00:00:00' ) t3 ;



--有交易的会员手机号
drop table cdf_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.cdf_ordr_user_cdj_02 ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  cdf_ordr_user_cdj_02;

truncate cdf_ordr_user_cdj_02;
insert into cdf_ordr_user_cdj_02
select  t1.cellphone 
from data_ods.ctg_cdf_member t1
join data_ods.cdf_ordr_user_cdj_01  t2
on toString(t1.userid) = t2.user_id
where t1.cellphone is not  null and  t1.cellphone <>'';

--member_travel 有交易的会员手机号
drop table travel_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.travel_ordr_user_cdj_02  ON CLUSTER default_cluster
(

    `phone_ciphertext` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_02',
 '{replica}')
ORDER BY phone_ciphertext
SETTINGS index_granularity = 8192;


select  count(*) from  travel_ordr_user_cdj_02;

truncate travel_ordr_user_cdj_02;
insert into travel_ordr_user_cdj_02
select  t1.phone_ciphertext  as cellphone
from  data_ods.ctg_travel_member  t1
join data_ods.travel_ordr_user_cdj_01  t2
on t1.phone_ciphertext = t2.contact_tel_ciphertext;


--hotel 有交易的会员手机号

drop table hotel_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 
CREATE TABLE data_ods.hotel_ordr_user_cdj_02  ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotel_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  hotel_ordr_user_cdj_02;
truncate hotel_ordr_user_cdj_02;
insert into hotel_ordr_user_cdj_02
select  t1.cellphone
from   data_ods.ctg_htl_s_hotel_member  t1
join data_ods.hotel_ordr_user_cdj_01  t2
on t1.row_id = t2.userId;

--touzi 有交易的会员手机号
drop table tz_ordr_user_cdj_02  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_02 ON CLUSTER default_cluster
(

    `cellphone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_02',
 '{replica}')
ORDER BY cellphone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_02;
truncate tz_ordr_user_cdj_02;

insert into tz_ordr_user_cdj_02
select  t1.mobile as cellphone
from    data_ods.ctg_invest_member  t1
join data_ods.tz_ordr_user_cdj_01  t2
on toString(t1.id) = t2.user_id;
 

--交易用户总数据量: 	9301479
drop table ordr_user_cdj_03  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.ordr_user_cdj_03 ON CLUSTER default_cluster
( 

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/ordr_user_cdj_03',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;


select  count(*) from  ordr_user_cdj_03;
truncate ordr_user_cdj_03;
insert into ordr_user_cdj_03
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,'中旅免税' as bgname  FROM  cdf_ordr_user_cdj_02 a   
UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM  travel_ordr_user_cdj_02  b 
UNION ALL
SELECT   c.cellphone AS phone ,'中旅酒店' as bgname  FROM   hotel_ordr_user_cdj_02 c  
UNION ALL
SELECT   d.cellphone AS phone ,'中旅投资' as bgname  FROM    tz_ordr_user_cdj_02 d ) t



--酒店会员重叠 

drop table hotle_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.hotle_ordr_user_cdj_04 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotle_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  hotle_ordr_user_cdj_04;
truncate hotle_ordr_user_cdj_04;

insert into hotle_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM      data_ods.ctg_cdf_member a   
UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member d 
 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y 
inner join (SELECT distinct cellphone from data_ods.ctg_htl_s_hotel_member) cc
on y.phone = cc.cellphone ;
 

--旅行会员重叠 
drop table travel_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 


CREATE TABLE data_ods.travel_ordr_user_cdj_04  ON CLUSTER default_cluster 
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  travel_ordr_user_cdj_04;
truncate travel_ordr_user_cdj_04;

insert into travel_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y 
inner join (SELECT distinct phone_ciphertext from data_ods.ctg_travel_member) cc
on y.phone = cc.phone_ciphertext;

--会员重叠 


drop table cdf_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 


CREATE TABLE data_ods.cdf_ordr_user_cdj_04  ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;


select  count(*) from  cdf_ordr_user_cdj_04;
truncate cdf_ordr_user_cdj_04;
insert into cdf_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y
inner join  (SELECT distinct cellphone from data_ods.ctg_cdf_member) cc
on phone = cc.cellphone;

--投资会员重叠 
drop table tz_ordr_user_cdj_04  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_04 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_04',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_04;
truncate tz_ordr_user_cdj_04;


insert into tz_ordr_user_cdj_04
SELECT DISTINCT phone from
(
SELECT  distinct phone   FROM
 (
SELECT   a.cellphone AS phone,a.bgname as bgname        FROM       data_ods.ctg_cdf_member a 
 UNION ALL
SELECT   b.phone_ciphertext AS phone , '中旅旅行' as bgname FROM   data_ods.ctg_travel_member b 
 UNION ALL
SELECT   c.cellphone AS phone ,c.bg_name  as bgname       FROM    data_ods.ctg_htl_s_hotel_member c  
UNION ALL
SELECT   d.mobile AS phone ,d.bgname  as bgname  FROM       data_ods.ctg_invest_member  d 

 ) Q  GROUP BY phone   HAVING count(phone)>=2 
 ) y
inner join (SELECT distinct mobile from data_ods.ctg_invest_member) cc
on y.phone = cc.mobile;




--酒店重叠的有交易会员
drop table hotle_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.hotle_ordr_user_cdj_05 ON CLUSTER default_cluster 
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/hotle_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from   data_ods.hotle_ordr_user_cdj_05;
truncate hotle_ordr_user_cdj_05;

insert into data_ods.hotle_ordr_user_cdj_05
select  distinct t1.phone   from  data_ods.hotle_ordr_user_cdj_04 t1   
join data_ods.ordr_user_cdj_03  t2 
on t1.phone = t2.phone;




--旅行重叠的有交易会员
drop table travel_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.travel_ordr_user_cdj_05 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/travel_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  travel_ordr_user_cdj_05;
truncate travel_ordr_user_cdj_05;

insert into data_ods.travel_ordr_user_cdj_05
select  distinct t1.phone   from travel_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;

--重叠的有交易会员
drop table cdf_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.cdf_ordr_user_cdj_05  ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/cdf_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  cdf_ordr_user_cdj_05;
truncate cdf_ordr_user_cdj_05;

insert into data_ods.cdf_ordr_user_cdj_05
select  distinct t1.phone   from cdf_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;

--投资重叠的有交易会员
drop table tz_ordr_user_cdj_05  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.tz_ordr_user_cdj_05 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/tz_ordr_user_cdj_05',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  tz_ordr_user_cdj_05;
truncate tz_ordr_user_cdj_05;
insert into data_ods.tz_ordr_user_cdj_05
select  distinct  t1.phone   from tz_ordr_user_cdj_04 t1
join ordr_user_cdj_03 t2
on t1.phone = t2.phone;


-重叠率计算
--酒店: 322903
select  count(1) from   hotle_ordr_user_cdj_05;
--旅行:287291
select  count(1) from   travel_ordr_user_cdj_05;
--cdf:702559
select  count(1) from   cdf_ordr_user_cdj_05;
--投资:159162
select  count(1) from   tz_ordr_user_cdj_05;

--总数: 9045900
select  count(1) from ordr_user_cdj_03;

 

--去重: 571211
drop table ordr_user_cdj_06  ON CLUSTER default_cluster SYNC; 

CREATE TABLE data_ods.ordr_user_cdj_06 ON CLUSTER default_cluster
(

    `phone` String COMMENT '用户id'
)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/{shard}/ordr_user_cdj_06',
 '{replica}')
ORDER BY phone
SETTINGS index_granularity = 8192;

select  count(*) from  ordr_user_cdj_06;
truncate ordr_user_cdj_06;

insert into data_ods.ordr_user_cdj_06
select distinct phone from (
select phone from   hotle_ordr_user_cdj_05
union all
select  phone from   travel_ordr_user_cdj_05
union all
select  phone from   cdf_ordr_user_cdj_05
union all
select  phone  from   tz_ordr_user_cdj_05) t

select  count(1) from   ordr_user_cdj_06;
 



本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/2106555.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

SSM健身俱乐部网站—计算机毕业设计源码25623

摘 要 大数据时代下&#xff0c;数据呈爆炸式地增长。为了迎合信息化时代的潮流和信息化安全的要求&#xff0c;利用互联网服务于其他行业&#xff0c;促进生产&#xff0c;已经是成为一种势不可挡的趋势。在健身俱乐部的要求下&#xff0c;开发一款整体式结构的健身俱乐部网站…

【稀疏矩阵】使用torch.sparse模块

文章目录 稀疏矩阵的格式coocsrcsc Construction of Sparse COO tensorsConstruction of CSR tensorsLinear Algebra operations&#xff08;稀疏与稠密之间混合运算&#xff09;Tensor methods and sparse&#xff08;与稀疏有关的tensor成员函数&#xff09;coo张量可用的ten…

【软件逆向】第38课,软件逆向安全工程师之静态补丁,每天5分钟学习逆向吧!

关于x64dbg补丁工具的使用&#xff0c;以下是一些基本的指南和步骤&#xff1a; x64dbg的安装与配置&#xff1a;首先&#xff0c;您需要从x64dbg的官方网站下载并安装x64dbg。界面介绍&#xff1a;x64dbg的主要界面包括反汇编窗口、寄存器窗口、数据窗口和堆栈窗口。反汇编窗…

正运动邀您共聚2024 CIOE中国光博会!

■展会名称&#xff1a; 第25届中国国际光电博览会&#xff08;以下简称&#xff1a;CIOE中国光博会&#xff09; ■展会日期 2024年9月11日–13日 ■展馆地点 中国深圳国际会展中心&#xff08;新馆&#xff09;■展位号6A52-10 9月11至13日&#xff0c;深圳国际会展中心…

lnmp - tp6.0的安装和简单使用

概述 使用了很长时间的Mac M2芯片的电脑在之前使用虚拟机之前总有一些bug不是那么好用&#xff0c;周末之余重新安装了一下centos虚拟机&#xff0c;搭建了lnmp环境&#xff0c;打算自己挤时间&#xff0c;做一点应用&#xff0c;作为一次新的小小的尝试。 安装&更新 ce…

HTML5好看的花店商城源码3

文章目录 1.设计来源1.1 主界面1.2 登录界面1.3 注册界面1.4 商品列表界面1.5 商品详细界面1.6 购物车界面1.7 团队介绍界面1.8 关于我们界面1.9 其他界面效果汇总 2.效果和源码2.1 动态效果2.2 源代码 源码下载万套模板&#xff0c;程序开发&#xff0c;在线开发&#xff0c;在…

设计模式 —— 单例模式

文章目录 一、单例模式1.1 单例模式定义1.2 单例模式的优点1.3 单例模式的缺点1.4 单例模式的使用场景 二、普通案例2.1 饿汉式单例模式(Eager Initialization Singleton)2.2 懒汉式单例模式(Lazy Initialization Singleton) 参考资料 本文源代码地址为 java-demos/singeleton-…

西柚云 Rstudio Server 使用教程

在生物信息学的研究中&#xff0c;R语言与RStudio的搭配如同汽车与引擎&#xff0c;是科研工作的强力组合。不过&#xff0c;除了在个人电脑上传统使用的方式&#xff0c;还有没有更简便、更高效的选择呢&#xff1f; RStudio Server Cloud —— 云端的RStudio体验 快速切换多…

【408DS算法题】036基础-14年真题_求二叉树的WPL

Index 真题题目分析实现总结 真题题目 二叉树的带权路径长度(WPL)是二叉树中所有叶结点的带权路径长度之和。给定一棵二叉树T ,采用二叉链表存储&#xff0c; 结点结构如下&#xff1a; 其中叶结点的weight域保存该结点的非负权值。设root为指向T的根结点的指针&#xff0c; 请…

贪心算法求无序数组最大递增序列

给定一个无序的数组&#xff0c;获取其最大的递增序列。下面使用贪心算法实现&#xff1a; 1、算法实现 void max_seq(int* arr,int len) {/// 标记递增序列的开始位置int start 0;/// 记录最大的递增序列数int max 0;int i 1;for( ; i<len; i){/// 如果当前元素大于…

【计算机组成原理】你知道什么是8421码、什么是余3码什么又是2421码吗?今天这篇文章带你认识计算机中的BCD码

BCD码 导读一、编码1.1 什么是编码&#xff1f;1.2 编码机制ASCII码非ASCII编码Unicode 二、 BCD码2.1 8421码小结 2.2 余3码2.3 2421码2.4 总结 结语 导读 大家好&#xff0c;很高兴又和大家见面啦&#xff01;&#xff01;&#xff01; 在上一篇内容中我们介绍了不同的进位…

github中action作用和讲解

1&#xff0c;简介 GitHub Actions 是 GitHub 的一个自动化功能&#xff0c;它允许你在 GitHub 仓库中自动执行软件开发工作流程。你可以使用 GitHub Actions 来执行各种任务&#xff0c;比如&#xff1a; 自动测试&#xff1a;每当代码被推送到仓库时&#xff0c;自动运行测试…

学生公寓单相费控电表的规格如何选择

石家庄光大远通电气有限公司学生公寓单相费控电表功能支持时间管理控制。L1、L2、L3可分别设置为工作日和节假日模式&#xff0c;每天多可设置8个时间段&#xff0c;每个时间段可分别设置为合闸状态或夜间模式&#xff0c;合闸时间段内电表保持合闸&#xff0c;夜间时间段内&am…

手机玩机常识-----小米系列机型 Android 15 更新计划 那些机型将会更新安卓15

小米机型是很多米粉最喜欢把玩的&#xff0c;其中解锁bl root 刷写twrp以及刷第三方系统资源相对其他品牌机型来说比较丰富。目前安卓15快要更新到很多机型。我们来了解下小米系列机型的更新计划是咋样的 小米会定期更新有关 Redmi红米 设备的支持日期的数据&#xff0c;包括可…

如何使用Spoon连接data-integration-server并在服务器上执行转换

1.建立连接 2.新建转换或任务 3.右键[子服务器]&#xff0c;新建一个服务器连接(data-integration-server服务器的连接信息) 4.右键[Run configurations],新建一个执行连接,勾选相应的选项即可: 5.选择服务器运行即可! 6.最后&#xff0c;你可以通过服务器端的WEB查看执行日志…

Kafka【八】如何保证消息发送的可靠性、重复性、有序性

【1】消息发送的可靠性保证 对于生产者发送的数据&#xff0c;我们有的时候是不关心数据是否已经发送成功的&#xff0c;我们只要发送就可以了。在这种场景中&#xff0c;消息可能会因为某些故障或问题导致丢失&#xff0c;我们将这种情况称之为消息不可靠。虽然消息数据可能会…

zoom缩放导致下拉框定位偏移问题

因为浏览器升级修改了zoom导致 https://developer.chrome.google.cn/release-notes/128?hlzh_tw 可根据zoom值计算相差偏移量 const isChromeHighVersion () > {const ua navigator.userAgent.toLowerCase();const chromeIndex ua.indexOf(chrome);if (chromeIndex >…

跑步戴的耳机哪个品牌的好?精选五款热门品牌骨传导耳机分享

近年来&#xff0c;骨传导耳机逐渐成为了人们喜爱的耳机之一。相比于传统的耳机&#xff0c;骨传导耳机不需要使用耳塞&#xff0c;就可以让用户在运动时更加自由自在&#xff0c;不受耳机带来的束缚感。然而&#xff0c;市面上的骨传导耳机品牌和型号众多&#xff0c;质量参差…

如何把大的txt文件拆分为小的文件?

命令&#xff1a;split 1. 功能&#xff1a;这个是一个Linux 命令&#xff0c;功能是一个大文件分割成多个较小的文件。 可以使用该命令的系统&#xff1a;在Linux 终端&#xff0c;或者是windows git bash 端口。 官方说明&#xff1a;在Linux 终端&#xff0c;或者是…

【生成模型系列(中级)】词向量维度选择的奥秘——从理论到实验的揭秘【通俗理解,代码模拟】

【通俗理解】词向量维度选择的奥秘——从理论到实验的揭秘 关键词提炼 #词向量 #维度选择 #最小熵原理 #Johnson-Lindenstrauss引理 #注意力机制 #图网络 第一节&#xff1a;词向量维度选择的类比与核心概念【尽可能通俗】 1.1 词向量维度选择的类比 词向量维度选择就像为一…