1.打地鼠
uid,hit,m
1,1,0
1,2,1
1,3,1
1,4,1
1,5,0
1,6,0
1,7,1
2,1,1
2,2,1
2,3,1
2,4,1
2,5,1
3,1,1
3,2,1
3,3,1
3,4,0
3,5,0
3,6,1
3,7,0
3,8,1create table tb_ds(
uid int , -- 用户名
hit int , -- 第几次打地鼠
m int -- 是否命中 1命中 0 未命中
)
row format delimited fields terminated by ',' ;
load data local inpath '/root/ds.txt' into table tb_ds ;select * from tb_ds;
案例分析
查询用户最大连续命中次数
--查询命中的的记录
select *
from tb_ds
where m = 1;
-- 用户分组 记录编号
select uid, hit, row_number() over (partition by uid order by hit) as rn
from tb_ds
where m = 1;
-- hit-rn
with t1 as (select uid, hit, row_number() over (partition by uid order by hit) as rn from tb_ds where m = 1)
select *, (hit - rn) as sub
from t1
-- 分组得到每个用户的连续击中
with t1 as (select uid, hit, row_number() over (partition by uid order by hit) as rn from tb_ds where m = 1),
t2 as (select *, (hit - rn) as sub from t1)
select uid, count(*) as hit_count
from t2
group by uid, sub
-- 用户分组得到每个用户最大次数
with t1 as (select uid, hit, row_number() over (partition by uid order by hit) as rn from tb_ds where m = 1),
t2 as (select *, (hit-rn) as sub from t1 ),
t3 as (select uid, count(*) as hit_count from t2 group by uid, sub )
select uid, max(hit_count) as hit_count
from t3
group by uid;
2.WordCount
1.建表数据
Why Studying History Matters
Studying a subject that you feel pointless is never a fun or easy task.
If you're study history, asking yourself the question "why is history important" is a very good first step.
History is an essential part of human civilization.
You will find something here that will arouse your interest, or get you thinking about the significance of history.
2.分析
create table t_wc(
line string
)row format delimited lines terminated by "\n";
load data local inpath '/root/word.txt' overwrite into table t_wc;
select * from t_wc;
-- 使用正则表达式去掉特殊符号 将 除了单词字符 及 ' 和 空格的 所有字符替换成空串
select regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', "") as word_line
from t_wc;
-- 使用split进行切割 将一行数据切割为一个数组
select split(regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', ""), "\\s+")
from t_wc;
-- 使用expload函数进行炸裂
select explode(split(regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', ""), "\\s+")) as word
from t_wc;
-- 分组得到每个单词的次数
with t1 as (select explode(split(regexp_replace(line, '[^a-zA-Z_0-9\'\\s]', ""), "\\s+")) as word from t_wc)
select word, count(1) as word_count
from t1
group by word;