赞
踩
目录
- --创建表scdn_student_score_test 并且初始化三个学生成绩
- create table hdw_tmp_dev.scdn_student_score_test as
- select '张三' as name,'数学' as subject ,'80' as score
- union all
- select '张三' as name,'语文' as subject ,'90' as score
- union all
- select '张三' as name,'英语' as subject ,'90' as score
- union all
- select '李四' as name,'数学' as subject ,'90' as score
- union all
- select '李四' as name,'语文' as subject ,'90' as score
- union all
- select '李四' as name,'英语' as subject ,'70' as score
- union all
- select '王五' as name,'数学' as subject ,'90' as score
- union all
- select '王五' as name,'语文' as subject ,'90' as score
- union all
- select '王五' as name,'英语' as subject ,'50' as score
- --查询结果显示
- | scdn_student_score_test.name | scdn_student_score_test.subject | scdn_student_score_test.score |
- +-------------------------------+----------------------------------+--------------------------------+
- | 张三 | 数学 | 80 |
- | 张三 | 语文 | 90 |
- | 张三 | 英语 | 90 |
- | 李四 | 数学 | 90 |
- | 李四 | 语文 | 90 |
- | 李四 | 英语 | 70 |
- | 王五 | 数学 | 90 |
- | 王五 | 语文 | 90 |
- | 王五 | 英语 | 50 |
- +-------------------------------+----------------------------------+---------------------
-
- --求所有学科成绩都大于等于80分的学生姓名

- select
- t1.name
- ,t1.min_score
- from
- (
- select
- name
- ,min(score) as min_score
- from hdw_tmp_dev.scdn_student_score_test
- group by name
- ) as t1 --求出最小的成绩
- where t1.min_score >=80 --最小成绩大于等于80,则这个学生的所有成绩都会大于等于80
- select
- t1.name
- from
- (
- select
- name
- from hdw_tmp_dev.scdn_student_score_test
- group by name
- ) as t1
- left join
- (
- select
- name
- from hdw_tmp_dev.scdn_student_score_test
- where score <80
- group by name
- ) as t2 on t1.name = t2.name
- where t2.name is null

- create table hdw_tmp_dev.csdn_user_login_test as
- select 'xiaoming' as user_name,'2024-01-01' as login_date
- union all
- select 'xiaoming' as user_name,'2024-01-02' as login_date
- union all
- select 'xiaoming' as user_name,'2024-01-03' as login_date
- union all
- select 'xiaoming' as user_name,'2024-01-04' as login_date
- union all
- select 'xiaoming' as user_name,'2024-01-05' as login_date
- union all
- select 'dahuang' as user_name,'2024-01-02' as login_date
- union all
- select 'dahuang' as user_name,'2024-01-03' as login_date
- union all
- select 'dahuang' as user_name,'2024-01-04' as login_date
- union all
- select 'dahuang' as user_name,'2024-01-05' as login_date
- union all
- select 'lucky_dog' as user_name,'2024-01-01' as login_date
- union all
- select 'lucky_dog' as user_name,'2024-01-03' as login_date
- union all
- select 'lucky_dog' as user_name,'2024-01-04' as login_date
- union all
- select 'lucky_dog' as user_name,'2024-01-05' as login_date

- select
- t2.user_name
- ,t2.date_begin_flag
- ,count(1) as max_login_days
- from
- (
- select
- t1.user_name
- ,t1.login_date
- ,date_sub(date(login_date),t1.rn) as date_begin_flag
- from
- (
- select
- user_name
- ,login_date
- ,row_number()over(partition by user_name order by login_date) as rn
- from hdw_tmp_dev.csdn_user_login_test
- ) as t1
- ) as t2
- group by t2.user_name
- ,t2.date_begin_flag

- | t1.name | t1.subject | t1.score |
- +-------------------------------+----------------------------------+-----------------+
- | 张三 | 数学 | 80 |
- | 张三 | 语文 | 90 |
- | 张三 | 英语 | 90 |
- | 李四 | 数学 | 90 |
- | 李四 | 语文 | 90 |
- | 李四 | 英语 | 70 |
- | 王五 | 数学 | 90 |
- | 王五 | 语文 | 90 |
- | 王五 | 英语 | 50 |
- +-------------------------------+----------------------------------+------------------
- select
- name
- ,max(case when subject = '数学' then score end) as math_score
- ,max(case when subject = '语文' then score end) as china_score
- ,max(case when subject = '英语' then score end) as english_score
- from hdw_tmp_dev.scdn_student_score_test
- group by name
- select
- name
- ,collect_set(subject) as subject_set
- from hdw_tmp_dev.scdn_student_score_test
- group by name
每个学生选课结果(多行变一行):
将上面的结果展开(一行变多行)数据准备:
- create table hdw_tmp_dev.scdn_student_score_test_collect as
- select 'zhangsan' as name ,'"数学","语文","英语"' as subject_list
- union all
- select 'lisi' as name ,'"美术","生物","物理"' as subject_list
- union all
- select 'wangwu' as name ,'"计算机","日语","韩语"' as subject_list
- select
- name
- ,subject_list
- ,subject_name
- from hdw_tmp_dev.scdn_student_score_test_collect
- lateral view explode(split(subject_list,',')) extend_sub as subject_name
结果
- --方案一:利用lead(日期,N)是否等于 当天登录实践+N天
- select
- t1.user_name
- ,t1.logon_date
- ,case when lead1_logon_date = date_add(logon_date,1) then '1天留存' end as 1day_remain
- ,case when lead3_logon_date = date_add(logon_date,3) then '3天留存' end as 3day_remain
- from
- (
- select
- user_name
- ,logon_date
- ,lead(user_name,1)over(partition by user_name order by logon_date) as lead1_user_name
- ,lead(logon_date,1)over(partition by user_name order by logon_date) as lead1_logon_date
- ,lead(user_name,3)over(partition by user_name order by logon_date) as lead3_user_name
- ,lead(logon_date,3)over(partition by user_name order by logon_date) as lead3_logon_date
- from hdw_tmp_dev.csdn_user_logon_test
- ) as t1
- --方案二:
- select
- t2.first_log_date as first_log_date
- ,count(t2.user_id) as new_user_cnt --新用户数
- ,count(t3.user_id) as next_user_id --次日回访用户数
- ,count(t4.user_id) as 30_user_id --30天回访用户数
- ,count(t3.user_id)/count(t2.user_id) as next_back_rate --次日回访率
- ,count(t4.user_id)/count(t2.user_id) as 30_back_rate --30天回访率
- from
- (
- select
- first_log_date
- ,user_id
- ,date_add(first_log_date,1) as next_log_date
- ,date_add(first_log_date,29) as 30_log_date
- from
- (
- select
- user_id
- ,log_time
- ,first_value(date(log_time))over(partition by user_id) as first_log_date
- from user_log
- ) as t1
- group by first_log_date
- ,user_id
- ) as t2
- left join
- (
- select
- user_id
- ,date(log_date) as log_date
- from user_log
- group by user_id
- ,date(log_date) as log_date
- ) as t3 on t2.user_id = t2.user_id and t2.next_log_date = t3.log_date
- left join
- (
- select
- user_id
- ,date(log_date) as log_date
- from user_log
- group by user_id
- ,date(log_date) as log_date
- ) as t4 on t2.user_id = t4.user_id and t2.30_log_date = t4.log_date
- group by t2.first_log_date

- create table hdw_tmp_dev.scdn_student_score_test1 as
- select '张三' as name,'数学' as subject ,'80' as score
- union all
- select '张三' as name,'语文' as subject ,'90' as score
- union all
- select '张三' as name,'英语' as subject ,'90' as score
- union all
- select '李四' as name,'数学' as subject ,'90' as score
- union all
- select '李四' as name,'语文' as subject ,'90' as score
- union all
- select '李四' as name,'英语' as subject ,'70' as score
- union all
- select '王五' as name,'数学' as subject ,'90' as score
- union all
- select '王五' as name,'语文' as subject ,'90' as score
- union all
- select '王五' as name,'英语' as subject ,'50' as score
- union all
- select '小明' as name,'数学' as subject ,'88' as score
- union all
- select '小明' as name,'语文' as subject ,'99' as score
- union all
- select '小明' as name,'英语' as subject ,'77' as score
- union all
- select '小文' as name,'数学' as subject ,'66' as score
- union all
- select '小文' as name,'语文' as subject ,'89' as score
- union all
- select '小文' as name,'英语' as subject ,'90' as score

- select
- *
- from
- (
- select
- name
- ,subject
- ,score
- ,row_number()over(partition by subject order by score desc) as rn
- from hdw_tmp_dev.scdn_student_score_test1
- ) as t1
- where rn<=3
- create table hdw_tmp_dev.user_sale_date as
- select '001' as user_id,'2024-02-01' as sale_date, 100 as amount
- union all
- select '001' as user_id,'2024-02-02' as sale_date, 200 as amount
- union all
- select '001' as user_id,'2024-02-03' as sale_date, 300 as amount
- union all
- select '001' as user_id,'2024-02-04' as sale_date, 400 as amount
- union all
- select '001' as user_id,'2024-02-05' as sale_date, 500 as amount
- union all
- select '001' as user_id,'2024-02-06' as sale_date, 600 as amount
- select
- user_id
- ,sale_date
- ,amount
- ,sum(amount)over(partition by user_id order by sale_date) as accuma_amount --按日期逐渐累加
- ,sum(amount)over(partition by user_id) as total_amount --按人汇总
- ,avg(amount)over(partition by user_id) as avg_amount --安人平均每天
- ,max(amount)over(partition by user_id) as max_amount --单日最大销售
- ,min(amount)over(partition by user_id) as min_amount --单日最小销售
- from hdw_tmp_dev.user_sale_date
- create table hdw_tmp_dev.csdn_user_login_time_detail as
- select '001' as user_id,'2024-02-01 10:00:00' as begin_time,'2024-02-01 12:00:00' as end_time
- union all
- select '002' as user_id,'2024-02-01 11:00:00' as begin_time,'2024-02-01 13:00:00' as end_time
- union all
- select '003' as user_id,'2024-02-01 11:00:00' as begin_time,'2024-02-01 14:00:00' as end_time
- union all
- select '004' as user_id,'2024-02-01 15:00:00' as begin_time,'2024-02-01 16:00:00' as end_time
- select
- t1.user_id
- ,t1.time1
- ,t1.flag
- ,sum(flag)over(order by t1.time1) as user_cnt
- from
- (
- select
- user_id
- ,begin_time as time1
- ,1 as flag
- from hdw_tmp_dev.csdn_user_login_time_detail
- union all
- select
- user_id
- ,end_time
- ,-1 as flag
- from hdw_tmp_dev.csdn_user_login_time_detail
- ) as t1

- create table hdw_tmp_dev.cycle_1 as
- select '1011' as a
- union all
- select '0101' as a
- select
- a,
- concat_ws(",",collect_list(cast(index as string))) as res
- from (
- select
- a,
- index+1 as index,
- chr
- from (
- select
- a,
- concat_ws(",",substr(a,1,1),substr(a,2,1),substr(a,3,1),substr(a,-1)) str
- from hdw_tmp_dev.cycle_1 as t8
- ) tmp1
- lateral view posexplode(split(str,",")) t as index,chr
- where chr = "1"
- ) tmp2
- group by a;

- create table hdw_tmp_dev.user_sale_date as
- select '001' as user_id,'2024-02-01' as sale_date, 100 as amount
- union all
- select '001' as user_id,'2024-02-02' as sale_date, 200 as amount
- union all
- select '001' as user_id,'2024-02-03' as sale_date, 300 as amount
- union all
- select '001' as user_id,'2024-02-04' as sale_date, 400 as amount
- union all
- select '001' as user_id,'2024-02-05' as sale_date, 500 as amount
- union all
- select '001' as user_id,'2024-02-06' as sale_date, 600 as amount
- select
- t1.user_id
- ,t1.sale_date
- ,t1.amount
- ,avg(t1.amount)over(partition by t1.user_id) as zhongwenshu
- from
- (
- select
- user_id
- ,sale_date
- ,amount
- ,row_number()over(partition by user_id order by amount) as rn
- from hdw_tmp_dev.user_sale_date
- ) as t1
- left join
- (
- select
- user_id
- ,count(1) as cnt
- from hdw_tmp_dev.user_sale_date
- group by user_id
- ) as t2 on t1.user_id = t2.user_id
- where t1.rn in (cnt/2,(cnt+1)/2,cnt/2+1)
- --总个数为奇数命中(cnt+1)/2;总个数为偶数命中:cnt/2,cnt/2+1,两数相加求平均值

- --产生1到10的连续数据
- select
- start_id + pos as id
- ,pos
- ,val
- from(
- select
- 1 as start_id,
- 10 as end_id
- ) m lateral view posexplode(split(space(end_id - start_id),'')) t as pos, val
- --方案二
- select
- row_number() over() as id
- from
- (select split(space(99), '') as x) t
- lateral view
- explode(x) ex;

Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。