Doris支持三种数据模型,分别是:
-
Aggregate Model(聚合模型)
-
Unique Model(唯一模型)
-
Duplicate Model(冗余模型)
Aggregate Model(聚合模型)
key相同的数据,Value会按照指定的聚合方式聚合到一起。(replace、sum、min、max)
- replace:替换
- sum:求和
- min:求最小
- max:求最大
演示案例(数据被聚合)
--创建表
CREATE TABLE IF NOT EXISTS test_db.example_site_visit
(
`user_id` LARGEINT NOT NULL COMMENT "用户id",
`date` DATE NOT NULL COMMENT "数据灌入日期时间",
`city` VARCHAR(20) COMMENT "用户所在城市",
`age` SMALLINT COMMENT "用户年龄",
`sex` TINYINT COMMENT "用户性别",
`last_visit_date` DATETIME REPLACE DEFAULT "1970-01-01 00:00:00" COMMENT "用户最后一次访问时间",
`cost` BIGINT SUM DEFAULT "0" COMMENT "用户总消费",
`max_dwell_time` INT MAX DEFAULT "0" COMMENT "用户最大停留时间",
`min_dwell_time` INT MIN DEFAULT "99999" COMMENT "用户最小停留时间"
)
AGGREGATE KEY(`user_id`, `date`, `city`, `age`, `sex`)
DISTRIBUTED BY HASH(`user_id`) BUCKETS 10
PROPERTIES("replication_num" = "1");
--往表中插入数据
insert into test_db.example_site_visit values(10000,'2017-10-01','北京',20,0,'2017-10-01 06:00:00',20,10,2);
insert into test_db.example_site_visit values(10000,'2017-10-01','北京',20,0,'2017-10-01 07:00:00',15,8,5);
insert into test_db.example_site_visit values(10001,'2017-10-01','北京',30,1,'2017-10-01 17:05:45',2,22,22);
insert into test_db.example_site_visit values(10002,'2017-10-02','上海',20,1,'2017-10-02 12:59:12',200,5,5);
insert into test_db.example_site_visit values(10003,'2017-10-02','广州',32,0,'2017-10-02 11:20:00',30,11,11);
insert into test_db.example_site_visit values(10004,'2017-10-01','深圳',35,0,'2017-10-01 10:00:15',100,3,3);
insert into test_db.example_site_visit values(10004,'2017-10-03','深圳',35,0,'2017-10-03 10:20:22',11,6,6);
--查询表结果
select * from example_site_visit;
Unique Model(唯一模型)
唯一模型,保证key的唯一性。换句话说,只要key相同,则Doris会用最新的数据替换之前的数据。
案例演示
--创建表
CREATE TABLE IF NOT EXISTS test_db.user
(
`user_id` LARGEINT NOT NULL COMMENT "用户id",
`username` VARCHAR(50) NOT NULL COMMENT "用户昵称",
`city` VARCHAR(20) COMMENT "用户所在城市",
`age` SMALLINT COMMENT "用户年龄",
`sex` TINYINT COMMENT "用户性别",
`phone` LARGEINT COMMENT "用户电话",
`address` VARCHAR(500) COMMENT "用户地址",
`register_time` DATETIME COMMENT "用户注册时间"
)
UNIQUE KEY(`user_id`, `username`)
DISTRIBUTED BY HASH(`user_id`) BUCKETS 10
PROPERTIES("replication_num" = "1");
--插入数据
insert into test_db.user values(10000,'zhangsan','北京',20,0,13112345312,'北京西城区','2020-10-01 07:00:00');
insert into test_db.user values(10000,'zhangsan','深圳',20,0,13112345312,'深圳市宝安区','2020-11-15 06:10:20');
--查看表数据
select * from user;
Duplicate Model(冗余模型)
冗余模型,允许数据存在重复(冗余)。可以存储原始数据,不会做任何的聚合操作。也不在保证数据的唯一性。
案例演示
--创建表
CREATE TABLE IF NOT EXISTS test_db.example_log
(
`timestamp` DATETIME NOT NULL COMMENT "日志时间",
`type` INT NOT NULL COMMENT "日志类型",
`error_code` INT COMMENT "错误码",
`error_msg` VARCHAR(1024) COMMENT "错误详细信息",
`op_id` BIGINT COMMENT "负责人id",
`op_time` DATETIME COMMENT "处理时间"
)
DUPLICATE KEY(`timestamp`, `type`)
DISTRIBUTED BY HASH(`timestamp`) BUCKETS 10
PROPERTIES("replication_num" = "1");
--插入数据
insert into test_db.example_log values('2020-10-01 08:00:05',1,404,'not found page', 101, '2020-10-01 08:00:05');
insert into test_db.example_log values('2020-10-01 08:00:05',1,404,'not found page', 101, '2020-10-01 08:00:05');
insert into test_db.example_log values('2020-10-01 08:00:05',2,404,'not found page', 101, '2020-10-01 08:00:06');
insert into test_db.example_log values('2020-10-01 08:00:06',2,404,'not found page', 101, '2020-10-01 08:00:07');
--查询表数据
select * from example_log;
三种数据模型的对比&应用场景