查询每个区域的男女用户数
0 问题描述
每个区域内男生、女生分别有多少个
1 数据准备
use wxthive;
create table t1_stu_table
(
id int,
name string,
class string,
sex string
);
insert overwrite table t1_stu_table
values
(4,'张文华','二区','男'),
(3,'李思雨','一区','女'),
(1,'王小凤','一区','女'),
(7,'李智瑞','三区','男'),
(6,'徐文杰','二区','男'),
(8,'徐雨秋','三区','男'),
(5,'张青云','二区','女'),
(9,'孙皓然','三区','男'),
(10,'李春山','三区','男'),
(2,'刘诗迪','一区','女');
2 数据分析
select
sex ,
count(case when class = '一区' then sex else null end ) as 1area,
count(case when class = '二区' then sex else null end ) as 2area,
count(case when class = '三区' then sex else null end ) as 3area
from
t1_stu_table
group by sex;
3 小结
略
==========================*****==========================
每个季度绩效得分大于70分的员工
0 问题描述
计算每个季度绩效得分都大于70分的员工
1 数据准备
create table t2_score_info_table
(
id int,
name string,
subject string,
score int
);
insert overwrite table t2_score_info_table
values
(1,'王小凤','第一季度',88),
(1,'王小凤','第二季度',55),
(1,'王小凤','第三季度',72),
(3,'徐雨秋','第一季度',92),
(3,'徐雨秋','第二季度',77),
(3,'徐雨秋','第三季度',93),
(2,'张文华','第一季度',70),
(2,'张文华','第二季度',77),
(2,'张文华','第三季度',91);
2 数据分析
select
id,
name,
min(score) as min_score
from
t2_score_info_table
group by
id,
name
having min_score > 70;
ps:只要保证每个季度的每位员工的最小绩效得分大于70分,就可以说明这位员工该季度的绩效得分都大于70分。
3 小结
==========================*****==========================
行列互换
0 问题描述
把下表所示的纵向存储数据的方式 改成 横向存储数据的方式(行转列)
1 数据准备
create table t3_row_col_table
(
year_num int,
month_num int,
sales int
);
insert overwrite table t3_row_col_table
values
(2019,1,100),
(2019,2,200),
(2019,3,300),
(2019,4,400),
(2020,1,200),
(2020,2,400),
(2020,3,600),
(2020,4,800);
2 数据分析
select
year_num,
sum(case when month_num= 1 then sales end ) as m1,
sum(case when month_num= 2 then sales end ) as m2,
sum(case when month_num= 3 then sales end ) as m3,
sum(case when month_num= 4 then sales end ) as m4
from t3_row_col_table
group by year_num
3 小结
==========================*****==========================
计算用户留存情况(*)
0 问题描述
计算用户的次日留存数、三日留存数、七日留存数
1 数据准备
create table t4_user_login
(
uid int,
login_time string
);
insert overwrite table t4_user_login
values
(1,'2021-4-21 6:00'),
(1,'2021-4-24 10:00'),
(1,'2021-4-25 19:00'),
(2,'2021-4-22 10:00'),
(2,'2021-4-28 9:00'),
(2,'2021-4-29 14:00'),
(3,'2021-4-27 8:00'),
(3,'2021-4-28 10:00');
2 数据分析
select
count(case when day_value = 1 then uid else null end ) as cnt1,
count(case when day_value = 3 then uid else null end ) as cnt2,
count(case when day_value = 7 then uid else null end ) as cnt3
from
(select
uid,
max(date_format(login_time,'yyyy-MM-dd')),
min(date_format(login_time,'yyyy-MM-dd')),
datediff(max(date_format(login_time,'yyyy-MM-dd')), min(date_format(login_time,'yyyy-MM-dd'))) as day_value
from t4_user_login
group by uid
) t1;
3 小结
==========================*****==========================
筛选最受欢迎的课程
0 问题描述
筛选最受欢迎的课程course
1 数据准备
create table t5_course_table
(
uid int,
name string,
grade string,
course string
);
insert overwrite table t5_course_table
values
(1,'王小凤','一年级','心理学'),
(2,'刘诗迪','二年级','心理学'),
(3,'李思雨','三年级','社会学'),
(4,'张文华','一年级','心理学'),
(5,'张青云','二年级','心理学'),
(6,'徐文杰','三年级','计算机'),
(7,'李智瑞','一年级','心理学'),
(8,'徐雨秋','二年级','计算机'),
(9,'孙皓然','三年级','社会学'),
(10,'李春山','一年级','社会学');
2 数据分析
-- 解法一:
select
course,
count(course) as course_count
from t5_course_table
group by course
order by course_count desc
limit 1;
-- 解法二:
select
course,
course_count
from
(
select
course,
count(course) as course_count,
row_number()over(order by count(course) desc) as rn
from t5_course_table
group by course
)t1
where rn = 1;
3 小结
==========================*****==========================
筛选出每个年级最受欢迎的三门课程
0 问题描述
筛选出每个年级最受欢迎的三门课程
1 数据准备
create table t5_course_table
(
uid int,
name string,
grade string,
course string
);
insert overwrite table t5_course_table
values
(1,'王小凤','一年级','心理学'),
(2,'刘诗迪','二年级','心理学'),
(3,'李思雨','三年级','社会学'),
(4,'张文华','一年级','心理学'),
(5,'张青云','二年级','心理学'),
(6,'徐文杰','三年级','计算机'),
(7,'李智瑞','一年级','心理学'),
(8,'徐雨秋','二年级','计算机'),
(9,'孙皓然','三年级','社会学'),
(10,'李春山','一年级','社会学');
2 数据分析
select
grade,
course
from
(
select
grade,
course,
cnt,
row_number() over(partition by grade order by cnt desc) as rn
from
(
select
grade,
course,
count(uid) as cnt
from t5_course_table
group by grade, course
)t1
)t2
where rn <= 3;
3 小结
==========================*****==========================
参考文章:
大数据开发面试必会的SQL 30题!!!_大数据sql面试题-CSDN博客