赞
踩
解析
- #方法一:嵌套查询
- select player_id,device_id from Activity
- where (player_id,event_date) in (
- select player_id,min(event_date)
- from Activity
- group by player_id
- )
-
- #方法二:时间窗口查询
- select player_id,device_id from
- (
- select
- player_id,
- device_id,
- row_number() over(partition by player_id order by event_date) as rn
- #按照player_id分组,组内按照event_date排序,将组内每条数据重新排序编号为rn
- from Activity
- ) as a
- where rn=1
ROW_NUMBER(): 求行数,结果为1,2,3,4
RANK(): 有间隔的分级,结果为1,2,2,4
DENSE_RANK(): 无间隔的分级,结果为1,2,2,3
解析
- #方法一:自连接
-
- #step1:自连接,得到图中整体的表(t1将设置为现在的时间戳,t2将为了找出比当前时间戳更早的记录)
- select t1.player_id,t1.event_date
- from Activity t1, Activity t2
- where t1.player_id=t2.player_id
-
- #step2:筛选日期在自身之前的记录,如图中灰色部分
- t1.event_date>=t2.event_date
-
- #step3:用group by 对两个主键进行分组并对目标字段进行sum(),如途中棕色虚线圈部分,综合代码为:
- select player_id,
- event_date,
- sum(game_played)as game_played_so_far
- from Activity t1, Activity t2
- where t1.player_id=t2.player_id and t1.event_date>=t2.event_date #拼表+筛选
- group by t1.player_id,t1.event_date #分组后用sum聚合
- #方法二:时间窗口
-
- select
- player_id,
- event_date,
- sum(game_played)over(partition by player_id order by event_date)as game_played_so_far
- #按照player_id分组,组内按照event_date排序,将每条数据event_date之前的game_played累加
- from Activity
- group by t1.player_id,t1.event_date
sum(字段1) over(partition by 字段2 order by 字段3 rows between unbounded preceding and current row) as 新字段名
函数说明:
sum(tota_amount)的求和是针对后面over()窗口的求和,
over中partition by player_id order by event_date 针对player_id这一组按照event_date排序,
rows between unbounded preceding and current row 限定了行是按照在当前行不限定的往前处理(累加范围),通俗就是处理当前以及之前的所有行的sum,即3月时sum(amount)求的时1、2、3月的和,2月时sum(amount)求的是1、2月的和。unbounded意思无限的 preceding在之前的,current row当前行。
这里可以改成:
- SUM(Salary) OVER (PARTITION BY Id ORDER BY Month ROWS 2 PRECEDING)
- #从当前月往前滚动2个月薪水累加(月份连续,如1,2月)
- or
- SUM(Salary) OVER (PARTITION BY Id ORDER BY Month range 2 PRECEDING)
- #从当前月往前找到2个月薪水累加(月份不连续,如1,3月)
解析
- # 方法一:用IN是否存在筛选第二天登录的记录
-
- SELECT
- ROUND(COUNT(DISTINCT player_id)/(SELECT COUNT(distinct player_id) FROM Activity),
- 2) AS fraction #分母是个固定的常数,而分子要根据下面的条件过滤
- FROM
- Activity
- WHERE
- (player_id,event_date)
- IN
- (SELECT player_id,Date(min(event_date)+1)
- #得到每个玩家的次日登陆时间,看原数据中是否存在(存在说明第二天登录过,则被where筛选出来)
- FROM Activity
- Group by player_id) #给每个player_id都计算第二天登陆日期,为date()聚合
- #方法二:JOIN+Avg()
-
- SELECT ROUND (Avg(a.player_id is not null),2) as fraction #筛选的是a表,不符合on条件的为空
- FROM Activity a right join #a:所有登陆记录
- (
- SELECT player_id,min(event_date) as firstDate
- FROM Activity
- Group by player_id
- ) b #b:把每个player_id第一天登录的时间拿出来
-
- on a.player_id=b.player_id and datediff(a.event_date,b.firstDate)=1
- #如果相差1天则有值,不等于1则为空,会被最开始筛选掉
- #方法三:窗口函数
-
- select round(count(distinct t.player_id)/(select count(distinct player_id) from Activity),2) fraction #在临时表中筛选后聚合
- from
- (
- select
- a.player_id,
- first_value(a.event_date) over(partition by a.player_id order by a.event_date)first_date,
- #按player_id进行分组,按event_date进行组内排序,取组内排序后第一个值
- lead(a.event_date,1)over(partition by a.player_id order by a.event_date)next_date
- #使用LEAD()函数,可以从当前行访问下一行的数据或下一行之后的行
- from Activity a) t #建立临时表
-
- where datediff(t.next_date,t.first_date) = 1 #在临时表中进行筛选
解析:
- #方法一:窗口函数用排序和总数判断
- Select id,company,salary from(
- Select
- id,company,salary,
- row_number() over (partition by company order by salary) rk,
- count(*) over(partition by company) cnt
- from Employee e
- ) t
- where t.rk in (cnt/2,cnt/2+1,cnt/2+0.5)
- # 如果是8个数字,中位数分别为第(4,5,4.5)个【偶数个:最中间的两个数字都是中位数】
- # 如果是9个数字,中位数分别为第(4.5,5.5,5)个【奇数个:最中间的一个数字是中位数】
- #方法二:把rank和count拼在一起后用where筛选判断
- Select id,company,salary from
- (
- Select
- id,E1.company,salary,
- row_number() over(partition by company order by salary) rn,
- total
- from Employee E1
- inner join
- (
- Select company,
- count(*) total
- from Employee
- group by company
- )E2 on E1.company=E2.company
- ) T
- where rn BETWEEN total/2 AND total/2 + 1
- # 如果是8个数字,中位数位于第(4,5)个之间【偶数个:最中间的两个数字都是中位数】
- # 如果是9个数字,中位数位于第(4.5,5.5)之间【奇数个:最中间的一个数字是中位数】
解析:
- #方法一:子查询
- select name from Employee
- where id in
- (
- select ManagerId
- from Employee
- group by ManagerID #将每个员工按对应经理分组
- having count(ManagerID)>=5 #找出管理员工数大于5的经理
- )
- # 方法二:自连接
- select e1.name from
- Employee e1 left join Employee e2
- on e1.id=e2.managerId
- #员工ID=经理ID,找出来的是所有经理员工(普通员工的ID不会等于自己经理的ID,经理员工的ID等于普通员工的ID)
- #eg.给id=101的员工匹配managerID=101的人,匹配出来了5个,给id=102的员工匹配managerID=102的人,匹配出来了0个
- group by e1.id #按照员工分组
- having count(*)>=5 #
inner join的结果:
解析
- select avg(num) as median from
- (
- select num,
- sum(frequency) over(order by num asc) as asc_amount,
- #num正向排序,逐行累加frequency,正向累加到哪个数字的时候超过总数的一半
- sum(frequency) over(order by num desc) as desc_amount,
- #num反向排序,逐行累加frequency,反向累加到哪个数字的时候超过总数的一半
- sum(frequency) over() as total_num
- from numbers
- )a
- where asc_amount >= total_num/2 and desc_amount >= total_num / 2
子查询排序的结果为:
解析
- #方法一:having count(#)
- select C.name,V.id from #C是得票者name,V是投票者id
- Candidate C left join Vote V
- on C.id=V.candidateId
- #得到的结果是{[["A", null], ["B", 4], ["B", 1], ["C", 3], ["D", 2], ["E", 5]]}
- group by C.name
- having count(*)>=all(select count(*) from vote group by candidateId)
- #按group by 的C.name来count(*)
- # A>all(......): A大于all()中的所有数,即大于all中的最大值
- #方法二:
- select Name from Candidate
- where id =
-
- (select CandidateId from Vote #找出投票表中得票数最大的id
- group by CandidateId
- order by count(CandidateId) desc #按groupby后的CandidateId来count()聚合
- limit 0,1
- )
解析
- select question_id survey_log from SurveyLog
- group by question_id #按题目分组
- order by avg(action='answer')desc,question_id #回答率,正确率等用avg()
- #先按回答率排序,若回答率有相同则按question_id排序
- limit 1
回答率,正确率等用avg()高效
- SELECT Id, Month, Salary
- FROM (
- SELECT Id, Month,
- SUM(Salary) OVER (PARTITION BY Id ORDER BY Month range 2 PRECEDING) AS Salary,
- #按id分组,组内按月份排序从当前行开始往前滚动,累加每个月的薪水
- rank() OVER (PARTITION BY Id ORDER BY Month DESC) AS r
- #按id分组,组内按月份降序排列,将序号标记为r(对大的月份的序号r=1)
-
- FROM Employee) t
- WHERE r > 1 #筛选出除最近一个月(即最大月)之外的数据
- ORDER BY Id, Month DESC;
子查询结果为(不筛选r>1):
解析
- select round(sum(TIV_2016),2) as TIV_2016 from(
- select *,
- count(PID) over (partition by TIV_2015) num_tiv,
- #按TIV_2015分组一共有几组
- count(PID) over (partition by LAT,LON) num_city
- #按LAT,LON分组一共有几组
- from insurance) h
- where h.num_tiv>1 #TIV_2015组内个数大于1
- and h.num_city=1 #组内个数等于1
- `#方法一:用limit取最大值 or 用count>all()取最大值
- select customer_number from Orders
- group by customer_number
- order by count(Order_number) desc
- #having count(Order_number)>=all(
- # select count(order_number) from Orders group by customer_number)
- #每个customer_number的订单数量
- limit 1
- #方法二:count()=max()
- select customer_number from Orders
- group by customer_number
- having count(order_number) = (
- select max(sum) from(
- select count(order_number) as sum from Orders group by customer_number)t
- )
- #方法三:窗口函数
- select customer_number from(
- select customer_number,
- rank() over ( order by count(order_number) desc) as rk
- from Orders
- group by customer_number
- )t
- where rk=1
-
- #若为rank() over (partition by customer_number order by count(order_number) desc) as rk,则为在customer_number组内排序,而我们要的customer_number的组的排序,因此用group by 分组后按rank()聚合得到每个组按count(order_number)的排名
- select
- round(
- ifnull(
- count(distinct requester_id, accepter_id)/
- (select count(distinct sender_id, send_to_id) accept_rate from FriendRequest)
- ,0)
- ,2) accept_rate
-
- from RequestAccepted
- select id,sum(num1) as num
-
- from (
-
- (select requester_id as id,count(accepter_id) as num1 from RequestAccepted R1
- group by requester_id)
- union all
- (select accepter_id as id,count(requester_id) as num1 from RequestAccepted R2
- group by accepter_id)
- )t
- group by id
- order by num desc
- limit 1
- #找出下第一个和下第二个,再判断下第一个和下第二个是否空闲
- select distinct(c1.seat_id)
- from cinema c1 join cinema c2
- on abs(c2.seat_id-c1.seat_id)=1 #编号连续可采用abs(A.id-B.id)=1
- where c1.free=1 and c2.free=1
- order by c1.seat_id
- #方法一:not in 子查询+左连接
- select name from SalesPerson S
- where sales_id not in (
- select O.sales_id from Orders O
- left join Company C on O.com_id=C.com_id
- where C.name ='RED'
- )
- #方法二:全left join
- select s.name, count(C.name) from SalesPerson S
- left join Orders O on S.sales_id=O.sales_id
- left join Company C on O.com_id=C.com_id and c.name='RED'
- #符合筛选条件的才会被匹配出来
- group by S.name
- having count(C.name)=0#找到没有被匹配出来的
- #结果是:[["John", 1], ["Amy", 0], ["Mark", 0], ["Pam", 1], ["Alex", 0]]}
- select id ,
-
- Case
- When p_id is null Then "Root"
- When id in (select distinct p_id from tree) Then "Inner"
- #是别人的父节点的点,
- Else "Leaf"
- End as Type
-
- from tree
- #每一个关注者follower,作为被关注者followee拥有的关注者follower数量
- select followee as follower,count(distinct follower) num
- #被关注者followee拥有的关注者follower数量:[["Alice", 1], ["Bob", 2], ["Donald", 1]]
- from follow
- where followee in (select distinct follower from follow)
- #对每一个关注者follower,作为被关注者followee的身份进行筛选
- group by followee
- #1.拼表,2.按日期和部门聚合计算平均金额和总金额,3.对比
- select
- pay_month,
- department_id,
- case #3.对比
- when department_avg/company_avg>1 then 'higher'
- when department_avg/company_avg=1 then 'same'
- else 'lower'
- end as comparison
-
- from(
- select #2.按日期和部门聚合计算平均金额和总金额
- date_format(pay_date,'%Y-%m') as pay_month,
- department_id,
- avg(amount) over (partition by pay_date) as company_avg,
- avg(amount) over (partition by department_id,pay_date) as department_avg
-
- from salary s
- left join employee e on s.employee_id=e.employee_id #1.拼表
- )t
- group by department_id,pay_month
- order by pay_month desc, department_id
-
- select
- max(case when continent="America" then name else null end) as America
- #3. 按rk=1的三个(Jack,Xi,Pascal)聚合max(),case when得到(Jack,null,null)=jack
- max(case when continent="Asia" then name else null end) as Asia
- max(case when continent="Europe" then name else null end) as Europe
- from
- (
- select
- name,
- continent,
- row_number()over (partition by continent order by name) as rk
- #按照continent分组,按照name排序为rk
- from Student
- )t
- #1. t表结果是:[["Jack", "America", 1], ["Jane", "America", 2], ["Xi", "Asia", 1], ["Pascal", "Europe", 1]]
- #2. rk是每个name在洲内排第几
-
- group by rk #按照rk聚合上面的max()
-
- #窗口函数中对continent进行分区排名 group by cur_rank 能把同一个洲的人分成不同的组 这样上面的max就能取出所有人 如果不加group by 的话 每个洲就只能取出一个人
-
- #max()函数的功效:(‘jack’, null, null)中得出‘jack',(null,null)中得出null。 min()函数也可以。
- #问题:哪些客户买了所有产品
- #思路:1. 找到每个客户购买的产品的个数,2. 找到购买个数=产品个数的客户
-
- #方法一:利用join 筛选相同产品个数的
- select c.customer_id
- from
- ( #找到每个客户购买的产品的个数
- select customer_id,count(distinct product_key) as cnt
- #如果不加distinct的话,如果1客户买了两个5产品也会被算进去
- from customer
- group by customer_id
- )c
- join
- ( #总产品个数
- select count(*) as cnt from product
- )p
- on c.cnt=p.cnt #找出客户购买个数=总产品个数
-
-
- #方法二:having筛选
- select customer_id
- from customer
- group by customer_id
- having count(distinct product_key)=(select count(*) from product)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。