-
题目:
-
Sql 测试用例:
-
Create table If Not Exists Friends (id int, name varchar(30), activity varchar(30)); Create table If Not Exists Activities (id int, name varchar(30)); Truncate table Friends; insert into Friends (id, name, activity) values ('1', 'Jonathan D.', 'Eating'); insert into Friends (id, name, activity) values ('2', 'Jade W.', 'Singing'); insert into Friends (id, name, activity) values ('3', 'Victor J.', 'Singing'); insert into Friends (id, name, activity) values ('4', 'Elvis Q.', 'Eating'); insert into Friends (id, name, activity) values ('5', 'Daniel A.', 'Eating'); insert into Friends (id, name, activity) values ('6', 'Bob B.', 'Horse Riding'); Truncate table Activities; insert into Activities (id, name) values ('1', 'Eating'); insert into Activities (id, name) values ('2', 'Singing'); insert into Activities (id, name) values ('3', 'Horse Riding');
-
分析:首先,我们可以先按照活动分组,然后算出每个活动中的id个数,然后按照这个总数进行排序,正序和倒序,然后不要排名为一的数据。
-
sql实现:
-
with t1 as ( select activity,count(1) nu from Friends group by activity -- 按照活动分组,然后算出每个组内id的个数 ), t2 as ( select activity,rank() over (order by nu) rk1,rank() over (order by nu desc ) rk2 from t1 -- 然后按照个数排序,算出升序和降序的排名(这里使用rank函数,因为需要考虑并列的情况) ) select activity from t2 where rk1 !=1 and rk2 !=1 -- 然后取出升序和降序排名不为1的活动名称
-
pandas测试例子:
-
data = [[1, 'Jonathan D.', 'Eating'], [2, 'Jade W.', 'Singing'], [3, 'Victor J.', 'Singing'], [4, 'Elvis Q.', 'Eating'], [5, 'Daniel A.', 'Eating'], [6, 'Bob B.', 'Horse Riding']] friends = pd.DataFrame(data, columns=['id', 'name', 'activity']).astype({'id':'Int64', 'name':'object', 'activity':'object'}) data = [[1, 'Eating'], [2, 'Singing'], [3, 'Horse Riding']] activities = pd.DataFrame(data, columns=['id', 'name']).astype({'id':'Int64', 'name':'object'})
-
pandas分析:和sql 的解法差不多
-
pandas实现:
-
import pandas as pd def activity_participants(friends: pd.DataFrame, activities: pd.DataFrame) -> pd.DataFrame: friend=friends.groupby('activity')['id'].count().reset_index() -- 分组,算出每个组内id个数 friend['rn']=friend['id'].rank(method='min') -- 升序排序考虑次数相同 friend['rn1']=friend['id'].rank(method='min',ascending=False) -- 降序排序考虑次数相同 friend=friend[(friend['rn']!=1) & (friend['rn1']!=1)]['activity'] --然后取出升序和降序排名不为1的活动名称 return friend.to_frame() -- 转换成dataframe对象