赞
踩
- --题目1:找出所有科目成绩都大于某一学科平均成绩的学生
- --表结构:score(uid,subject_id,score)
- --数据例:1001 01 90
- select t.uid
- from (select uid,
- count(1) as cnt,
- count(`if`(tab.score > avg_sco, tab.score, null)) as avg_ant
- from (select uid,
- subject_id,
- score,
- avg(score) over (partition by subject_id) as avg_sco
- from score) tab
- group by uid) t
- where t.cnt = t.avg_ant
-
-
-
- --题目2:统计出每个用户每个月的访问数和累积访问次数
- --表结构:action(userId string,visitDate string,visitCount)
- --数据例:u01 2017/2/22 4
- select userid,
- dt,
- cnt current_cnt,
- sum(cnt) over (partition by userid order by dt) acc_cnt
- from (select userid,
- dt,
- sum(visitcount) as cnt
- from (select userId,
- date_format(regexp_replace(visitdate, '/', '-'), 'yyyy-MM') as dt,
- visitcount
- from action) t
- group by userid, dt) tab
-
-
-
- --题目3:有50W个京东店铺,每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志,访问日志存储的表名为Visit,访客的用户id为user_id,被访问的店铺名称为shop,请统计
- --数据例:u1 a
- --1)每个店铺的UV(访客数)
- --2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数数据集
- 1:
- select shop, count(distinct user_id)
- from visit
- group by shop;
- 2:
- select shop,
- user_id,
- cnt,
- rn
- from (select shop,
- user_id,
- cnt,
- rank() over (partition by shop order by t.cnt desc ) as rn
- from (select shop,
- user_id,
- count(1) as cnt
- from visit
- group by shop, user_id) t) temp
- where temp.rn <= 3
-
-
-
- --题目4
- --order_tab(dt,order_id,user_id,amount)
- --数据样例:2017-01-01,10029028,1000003251,33.57
- --1)给出 2017年每个月的订单数、用户数、总成交金额。
- --2)给出2017年11月的新客数(指在11月才有第一笔订单)
- 1:
- select substr(dt, 1, 7),
- count(order_id),
- count(distinct user_id),
- sum(amount)
- from order_tab
- where substr(dt, 1, 4) = '2017'
- group by substr(dt, 1, 7);
- 2.
- select count(1) as cnt
- from (select user_id,
- min(dt)
- from order_tab
- group by user_id
- having substr(min(dt), 1, 7) = '2017-11') t
-
-
-
- --题目5
- --请写出代码求得所有用户和活跃用户的总数及平均年龄,活跃用户指连续两天都有访问记录的用户
- --user_age(dt string,user_id string,age int)
- --数据样例:2019-02-11,test_1,23
- select count(user_id) as user_cnt,
- sum(t.age) / count(user_id) user_avg_age,
- 'total' is_active
- from (select user_id, min(user_age.age) as age
- from user_age
- group by user_id) t
- union all
- select count(user_id) as act_user,
- sum(age) / count(user_id) act_user_avg_age,
- 'active'
- from (select user_id,
- min(age) as age
- from (select user_id,
- dt,
- age,
- datediff(nvl(lead_dt, '9999-12-31'), dt) as diff_num
- from (select user_id,
- dt,
- age,
- lead(dt, 1) over (partition by user_id order by dt) as lead_dt
- from (select user_id,
- dt,
- min(age) as age
- from user_age
- group by user_id, dt) t) t1) t2
- where diff_num = 1
- group by user_id) t3
-
-
-
- --题目6
- --用一条SQL语句查询出每门课都大于80分的学生姓名
- --name kecheng fenshu
- --数据示例:张三 语文 81
- select name from score group by name having min(fenshu)>80
-
-
-
- --题目7
- --怎么把这样一个
- year month amount
- 1991 1 1.1
- 1991 2 1.2
- 1991 3 1.3
- 1991 4 1.4
- 1992 1 2.1
- 1992 2 2.2
- 1992 3 2.3
- 1992 4 2.4
- 查成这样一个结果
- year m1 m2 m3 m4
- 1991 1.1 1.2 1.3 1.4
- 1992 2.1 2.2 2.3 2.4
-
- select year,
- max(case when month = 1 then amount else null end) as m1,
- max(case when month = 2 then amount else null end) as m2,
- max(case when month = 3 then amount else null end) as m3,
- max(case when month = 4 then amount else null end) as m4
- from m_am
- group by year
-
-
-
-
- --题目8 有一个订单表order。已知字段有:order_id(订单ID), user_id(用户ID),amount(金额), pay_datetime(付费时间),channel_id(渠道ID),dt(分区字段)。
- --order_a(order_id int,user_id int,amount double,pay_datatime timestamp,channel_id int)
- --1)查询dt=‘2018-09-01‘里每个渠道的订单数,下单人数(去重),总金额
- select channel_id,
- count(order_id),
- count(distinct user_id),
- sum(amount)
- from order_a
- where dt = '2018-09-01'
- group by channel_id;
- --2)查询dt=‘2018-09-01‘里每个渠道的金额最大3笔订单。
- select channel_id,
- order_id,
- rn
- from (select channel_id,
- order_id,
- rank() over (partition by channel_id order by amount desc ) as rn
- from order_a
- where dt = '2018-09-01') t
- where t.rn <= 3;
- --4) 有一天发现订单数据重复,请分析原因
- 1.首先关系型数据库不会发生重复
- 2.hive ods层接受到数据有重复很正常,前提数据通道是kafka数据源
- 3.那么很有可能后端查询mysql业务数据重复查询,然后重复打到kafka了
- 4.也有可能手动使用datax等工具同步数据的时候,数据拉取限制条件在凌晨12点的情况下,>和>=没写清楚
- --题目1
- --找出连续3天及以上减少碳排放量在100以上的用户
- --id dt lowcarbon
- --1001 2021-12-12 123
- --1002 2021-12-12 45
- --1001 2021-12-13 43
- --1001 2021-12-13 45
- --1001 2021-12-13 23
- --1002 2021-12-14 45
- --1001 2021-12-14 230
- --1002 2021-12-15 45
- --1001 2021-12-15 23
- select id
- from (select id,
- dt,
- cnt,
- date_sub(dt, rn) as tem_dt
- from (select id,
- dt,
- cnt,
- rank() over (partition by id,dt order by cnt) as rn
- from (select id, dt, sum(lowcarbon) as cnt
- from table
- group by id, dt
- having sum(lowcarbon) > 100) t1) t2) t3
- group by id, tem_dt
- having count(1) >= 3
-
-
-
- --题目2
- --计算每个用户最大的连续登录天数,可以间隔一天。解释:如果一个用户在 1,3,5,6 登录游戏,则视为连续 6 天登录
- --id dt
- --1001 2021-12-12
- --1001 2021-12-12
- --1001 2021-12-13
- --1001 2021-12-14
- --1001 2021-12-16
- --1001 2021-12-19
- --1001 2021-12-20
- --1002 2021-12-12
- --1002 2021-12-16
- --1002 2021-12-17
- --第 4 题 打折日期交叉问题
- --如下为平台商品促销数据:字段为品牌,打折开始日期,打折结束日期
- --id stt edt
- --oppo 2021-06-05 2021-06-09
- --oppo 2021-06-11 2021-06-21
- --vivo 2021-06-05 2021-06-15
- --vivo 2021-06-09 2021-06-21
- --redmi 2021-06-05 2021-06-21
- --redmi 2021-06-09 2021-06-15
- --redmi 2021-06-17 2021-06-26
- --huawei 2021-06-05 2021-06-26
- --huawei 2021-06-09 2021-06-15
- --huawei 2021-06-17 2021-06-21
- 计算每个品牌总的打折销售天数,注意其中的交叉日期,比如 vivo 品牌,第一次活动时间为 2021-06-05 到 2021-06-15,第二次活动时间为 2021-06-09 到 2021-06-21 其中 9 号到 15 号为重复天数,只统计一次,即 vivo 总打折天数为 2021-06-05 到 2021-06-21 共计 17 天
- select id,
- sum(if(days>=0,days+1,0)) days
- from
- ( select id,
- datediff(edt,stt) days
- from
- (
- select id,
- if(maxEdt is null,stt,if(stt>maxEdt,stt,date_add(maxEdt,1))) stt,
- edt
- from
- ( select id,
- stt,
- edt,
- max(edt) over(partition by id order by stt rows between UNBOUNDED PRECEDING and 1 PRECEDING) maxEdt
- from test_b
- )t1
- )t2
- )t3
- group by
- id;
- --如下为某直播平台主播开播及关播时间,根据该数据计算出平台最高峰同时在线的主播人数。
- --id stt edt
- --1001 2021-06-14 12:12:12 2021-06-14 18:12:12
- --1003 2021-06-14 13:12:12 2021-06-14 16:12:12
- --1004 2021-06-14 13:15:12 2021-06-14 20:12:12
- --1002 2021-06-14 15:12:12 2021-06-14 16:12:12
- --1005 2021-06-14 15:18:12 2021-06-14 20:12:12
- --1001 2021-06-14 20:12:12 2021-06-14 23:12:12
- --1006 2021-06-14 21:12:12 2021-06-14 23:15:12
- --1007 2021-06-14 22:12:12 2021-06-14 23:10:12
- select max(cnt)
- from (select id,
- t,
- sum(flag) over (order by t) as cnt
- from (select id, stt as t, 1 as flag
- from test_c
- union all
- select id, edt as t, -1 as flag
- from test_c) t1) t2
- --有一个5000万的用户文件(user_id,name,age),一个2亿记录的用户看电影的记录文件(user_id,url),根据年龄段观看电影的次数进行排序?
- select age_phase,
- count(1) as cnt
- from (select u.user_id,
- name,
- age,
- age_phase,
- url
- from (SELECT user_id,
- name,
- age,
- CASE
- WHEN age <= 10 AND age > 0 THEN '0-10'
- WHEN age <= 20 AND age > 10 THEN '10-20'
- WHEN age > 20 AND age <= 30 THEN '20-30'
- WHEN age > 30 AND age <= 40 THEN '30-40'
- WHEN age > 40 AND age <= 50 THEN '40-50'
- WHEN age > 50 AND age <= 60 THEN '50-60'
- WHEN age > 60 AND age <= 70 THEN '60-70'
- ELSE '70以上' END as age_phase
- from test4user) u
- join test4log l on u.user_id = l.user_id) t
- group by age_phase order by count(1) desc;
-
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。