赞
踩
select id ,concat_ws(',',collect_set(value)) as rows2col from rows2cols group by id;
select id ,concat_ws(',',collect_list(value)) as rows2col from rows2cols group by id;
select * from default.cols2row lateral view explode(split(value,',')) num as tag ;
create table student_info(
id int,
studentId int,
english int,
math int,
className string,
deptName string
);
insert into table student_info (11,211,68,69,90,'计算机科学与技术技术','计算机学院'), (12,212,73,80,96,'计算机科学与技术技术','计算机学院'), (13,213,90,74,75,'计算机科学与技术技术','计算机学院'), (14,214,89,94,93,'计算机科学与技术技术','计算机学院'), (15,215,79,93,89,'计算机科学与技术技术','计算机学院'), (16,221,96,74,79,'软件工程','计算机学院'), (17,222,89,86,85,'软件工程','计算机学院'), (18,223,70,78,61,'软件工程','计算机学院'), (19,224,76,70,76,'软件工程','计算机学院'), (20,311,89,93,60,'计算机科学与技术技术','软件学院'), (21,312,76,83,75,'计算机科学与技术技术','软件学院'), (22,313,71,94,90,'计算机科学与技术技术','软件学院'), (23,314,94,94,66,'计算机科学与技术技术','软件学院'), (24,315,84,82,73,'计算机科学与技术技术','软件学院'), (25,316,85,74,43,'计算机科学与技术技术','软件学院'), (26,321,77,99,61,'软件工程','软件学院'), (27,322,80,76,56,'软件工程','软件学院'), (28,323,79,74,96,'软件工程','软件学院'), (29,324,75,80,78,'软件工程','软件学院'), (30,325,82,85,63,'软件工程','软件学院');
select studentid,math,classname,deptname,count(math) over(partition by classname) as class_math from default.student_info;
select studentid,math,classname,deptname,count(math) over(partition by classname order by math) as class_math from default.student_info;
select studentid,math,classname,deptname,count(math) over(partition by classname order by math row between 2 preceding and 2 following) as class_math from default.student_info;
select studentid,math,classname,deptname,sum(math) over(partition by classname) as class_math from default.student_info;
select studentid,math,classname,deptname,sum(math) over(partition by classname order by math) as class_math from default.student_info;
select studentid,math,classname,deptname,sum(math) over(partition by classname order by math rows between 2 preceding and 2 following) as class_math from default.student_info where deptname ='计算机学院';
select studentid,math,classname,deptname,max(math) over(partition by classname) as class_math from default.student_info;
select studentid,math,classname,deptname,max(math) over(partition by classname order by math ) as class_math from default.student_info;
select studentid,math,classname,deptname,max(math) over(partition by classname order by math rows between 2 preceding and 2 following) as class_math from default.student_info;
select studentid,math,classname,deptname,min(math) over(partition by classname) as class_math from default.student_info;
select studentid,math,classname,deptname,min(math) over(partition by classname order by math) as class_math from default.student_info;
select studentid,math,classname,deptname,min(math) over(partition by classname order by math rows between 2 preceding and 2 following) as class_math from default.student_info;
select studentid,math,classname,deptname,avg(math) over(partition by classname) as class_math from default.student_info;
select studentid,math,classname,deptname,avg(math) over(partition by classname order by math) as class_math from default.student_info;
select studentid,math,classname,deptname,avg(math) over(partition by classname order by math rows between 2 preceding and 2 following) as class_math from default.student_info;
select studentid,math,classname,deptname,
/不分组情况下去第一个
first_value(math) over() as first_value1,
//按照班级分组后,获取该分组的第一个数学成绩
first_value(math) over(partition by classname) as fv_no_sort,
//按照班级分组后,并排序,获取该分组的第一个数学成绩
first_value(math) over(partition by classname order by math) as fv_sort,
//按照班级分组后,并排序,以当前行及其前后各2行[共5行],获取该分组的第一个数学成绩
first_value(math) over(partition by classname order by math rows between 2 preceding and 2 following) as fv_sort_rows_limit
from default.student_info;
select studentid,math,classname,deptname,
/不分组情况下去最后一个数学成绩值
first_value(math) over() as first_value1,
//按照班级分组后,获取该分组的最后一个数学成绩值
first_value(math) over(partition by classname) as fv_no_sort,
//按照班级分组后,并排序,获取该分组的最后一个数学成绩值
first_value(math) over(partition by classname order by math) as fv_sort,
//按照班级分组后,并排序,以当前行及其前后各2行[共5行],获取该分组的最后一个数学成绩值
first_value(math) over(partition by classname order by math rows between 2 preceding and 2 following) as fv_sort_rows_limit
from default.student_info;
lag(col,n,default) 用于统计窗口内往上第n个值,col:列名,n:第n行,default:往上第n行为NULL,取默认值,否则为NULL
SQL
select studentid,math,classname,deptname,
// 以专业班级坐分组,然后对高数成绩排序,取窗口往上第2个的高数成绩,设置填充值
lag(math,2,-1) over(partition by classname order by math) as lag_with_fill_value,
// 以专业班级坐分组,然后对高数成绩排序,取窗口往上第2个的高数成绩,不设置填充值
lag(math,2) over(partition by classname order by math) as lag_no_fill_value from default.student_info;
select studentid,math,classname,deptname,
lead(math,2,-1) over(partition by classname order by math) as lead_with_fill_value,l
ead(math,2) over(partition by classname order by math) as lead_no_fill_value
from default.student_info;
select studentid,math,classname,deptname,
// 根据成绩排序
rank() over( order by math) as rank_no_sort,
//按照专业班级分区,根据成绩排序
rank() over(partition by classname order by math) as rand_classname_sort ,
//按照院系、专业班级分区,根据成绩排序
rank() over(partition by deptname,classname order by math) as rank_classDept_sort
from default.student_info;
select studentid,math,classname,deptname,
// 根据成绩排序
dense_rank() over( order by math) as dense_rank_no_sort,
//按照专业班级分区,根据成绩排序
dense_rank() over(partition by classname order by math) as dense_rank_classname_sort ,
//按照院系、专业班级分区,根据成绩排序
dense_rank() over(partition by deptname,classname order by math) as dense_rank_classDept_sort
from default.student_info;
select studentid,math,classname,deptname,
row_number() over(partition by deptname,classname order by math) as row_numm,
percent_rank() over(partition by deptname,classname order by math) as percent_rank
from student_info;
类似机器学习的分箱操作把数据按分、比例分成若干份
001,001,math,15 001,002,math,20 001,003,math,35 001,004,math,40 001,005,math,48 001,006,math,60 001,007,math,69 001,008,math,80 001,009,math,89 001,010,math,100 001,001,english,99 001,002,english,100 001,003,english,87 001,004,english,10 001,005,english,50 001,006,english,30 001,007,english,58 001,008,english,68 001,009,english,78 001,010,english,89 002,001,math,15 002,002,math,20 002,003,math,35 002,004,math,40 002,005,math,48 002,006,math,60 002,007,math,69 002,008,math,80 002,009,math,89 002,010,math,100 002,001,english,99 002,002,english,100 002,003,english,87 002,004,english,10 002,005,english,50 002,006,english,30 002,007,english,58 002,008,english,68 002,009,english,78 002,010,english,89
select stuid,classid,course,score,ntile(5) over (partition by classId order by score) as bucket from student where course = 'math';
//按班级分组、并根据数学成绩排序后,获取钱百20
select stuid,classid,course,score,bucket from (select stuid,classid,course,score,cume_dist() over (partition by classId order by score) as bucket from student where course = 'math') tmp where tmp.bucket <= 0.2
select studentid,math,classname,deptname,row_number() over(partition by deptname,classname order by math) as row_num from student_info;
//分别按照院系所有值分组,院系和专业所有值组合分组进行聚合
select deptname,classname,grouping_id,count(1) from student_info group by deptname,classname with cube;
结果
一般SQL【不建议】
//分别安装院系,院系和专业进行成绩人数统计
//不分组
select 'ALL' classname,'ALL' deptname ,count(1) from student_info
union all
//根据院系分组
select deptname,'ALL' classname,count(1) from student_info group by deptname
union all
//根据院系和专业分组
select deptname,classname,count(1) from student_info group by deptname,classname;
select deptname,classname,GROUPING__ID,count(1) stu_cnt from student_info group by deptname,classname with rollup;
//根据院系和专业维度的列值组合进行聚合
select deptname,classname,count(1) from student_info group by deptname,classname
union all
//根据院系维度的列值组合进行聚合
select deptname,'ALL' classname,count(1) from student_info group by deptname
union all
//不分组进行聚合
select 'ALL' classname,'ALL' deptname ,count(1) from student_info ;
select
deptname,classname,GROUPING__ID,count(1)
from student_info
group by deptname,classname
GROUPING SETS((deptname,classname),deptname,classname);
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。