赞
踩
标准窗口函数语法如下所示。
--中括号表示非必要的
function (expression) OVER ([ PARTITION BY expr_list ][ ORDER BY order_list [ frame_clause ]])
基于 OVER 子句中的 ORDER BY 表达式确定一组行中当前行的序号(从 1 开始计数)。如果存在可选的 PARTITION BY 子句,则为每组行重置序号。ORDER BY 表达式中具有相同值的行以非确定性的方式接收不同的行号。
RANK 窗口函数基于 OVER 子句中的 ORDER BY 表达式确定一组值中的一个值的排名。如果存在可选的 PARTITION BY 子句,则为每个行组重置排名。带符合排名标准的相同值的行接收相同的排名。Amazon Redshift 将关联行的数目添加到关联排名以计算下一个排名,因此排名可能不是连续数。例如,如果两个行的排名为 1,则下一个排名则为 3。
drop table if exists examination_info,exam_record;
-- 试卷信息表
CREATE TABLE examination_info (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
exam_id int UNIQUE NOT NULL COMMENT '试卷ID',
tag varchar(32) COMMENT '类别标签',
difficulty varchar(8) COMMENT '难度',
duration int NOT NULL COMMENT '时长',
release_time datetime COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
-- 试卷作答记录
CREATE TABLE exam_record (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid int NOT NULL COMMENT '用户ID',
exam_id int NOT NULL COMMENT '试卷ID',
start_time datetime NOT NULL COMMENT '开始时间',
submit_time datetime COMMENT '提交时间',
score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO examination_info(exam_id,tag,difficulty,duration,release_time) VALUES
(9001, 'SQL', 'hard', 60, '2020-01-01 10:00:00'),
(9002, 'C++', 'hard', 80, '2020-01-01 10:00:00'),
(9003, '算法', 'hard', 80, '2020-01-01 10:00:00'),
(9004, 'PYTHON', 'medium', 70, '2020-01-01 10:00:00');
INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-08-02 10:01:01', '2020-08-02 10:31:01', 89),
(1002, 9001, '2020-04-01 18:01:01', '2020-04-01 18:59:02', 90),
(1001, 9001, '2020-04-01 09:01:01', '2020-04-01 09:21:59', 80),
(1002, 9003, '2021-01-20 10:01:01', '2021-01-20 10:10:01', 81),
(1002, 9001, '2021-03-02 19:01:01', '2021-03-02 19:32:00', 20),
(1001, 9003, '2021-04-02 19:01:01', '2021-04-02 19:40:01', 89),
(1004, 9004, '2020-05-02 12:01:01', '2020-05-02 12:20:01', 99),
(1003, 9001, '2021-05-02 12:01:01', '2021-05-02 12:31:01', 98),
(1001, 9002, '2020-02-02 12:01:01', '2020-02-02 12:20:01', 99),
(1002, 9002, '2020-02-02 12:01:01', '2020-02-02 12:43:01', 81),
(1001, 9002, '2020-01-02 19:01:01', '2020-01-02 19:59:01', 69),
(1001, 9004, '2021-09-02 12:11:01', null, null),
(1003, 9001, '2020-01-02 10:01:01', '2020-01-02 10:31:01', 89),
(1002, 9004, '2020-01-01 12:11:01', '2020-01-01 12:31:01', 83),
(1002, 9003, '2021-01-01 18:01:01', '2021-01-01 18:59:02', 90),
(1002, 9002, '2020-02-02 12:01:01', null, null),
(1002, 9002, '2020-03-02 12:11:01', null, null),
(1001, 9002, '2021-05-05 18:01:01', null, null);
SELECT e1.uid,DATE_FORMAT(e1.start_time,'%Y%m') start_month
,COUNT(e1.exam_id) total_cnt
,COUNT(IF(e1.submit_time IS NOT NULL,1,NULL)) complete_cnt
FROM exam_record e1
JOIN user_info u ON u.uid = e1.uid
JOIN (
SELECT e.uid
,PERCENT_RANK()OVER(ORDER BY COUNT(IF(e.submit_time IS NULL,1,NULL))/COUNT(e.exam_id) DESC) num
FROM exam_record e LEFT JOIN examination_info ei ON e.exam_id = ei.exam_id
WHERE ei.tag = 'SQL'
GROUP BY e.uid
) tmp ON tmp.uid = u.uid
WHERE tmp.num <= 0.5
AND u.level IN (6,7)
AND e1.start_time >= '2020-02-01 00:00:00'
GROUP BY e1.uid,DATE_FORMAT(e1.start_time,'%Y%m')
ORDER BY e1.uid,start_month
DENSE_RANK 窗口函数基于 OVER 子句中的 ORDER BY 表达式确定一组值中的一个值的排名。如果存在可选的 PARTITION BY 子句,则为每个行组重置排名。带符合排名标准的相同值的行接收相同的排名。DENSE_RANK 函数与 RANK 存在以下一点不同:如果两个或两个以上的行结合,则一系列排名的值之间没有间隔。例如,如果两个行的排名为 1,则下一个排名则为 2。
计算当前行在窗口内的百分比排名。使用以下公式确定百分比排名:(x - 1) / (所在窗口行数 - 1)
drop table if exists examination_info,user_info,exam_record;
--试卷信息表
CREATE TABLE examination_info (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
exam_id int UNIQUE NOT NULL COMMENT '试卷ID',
tag varchar(32) COMMENT '类别标签',
difficulty varchar(8) COMMENT '难度',
duration int NOT NULL COMMENT '时长',
release_time datetime COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
-- 用户信息表
CREATE TABLE user_info (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid int UNIQUE NOT NULL COMMENT '用户ID',
`nick_name` varchar(64) COMMENT '昵称',
achievement int COMMENT '成就值',
level int COMMENT '用户等级',
job varchar(32) COMMENT '职业方向',
register_time datetime COMMENT '注册时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
--试卷作答记录表
CREATE TABLE exam_record (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid int NOT NULL COMMENT '用户ID',
exam_id int NOT NULL COMMENT '试卷ID',
start_time datetime NOT NULL COMMENT '开始时间',
submit_time datetime COMMENT '提交时间',
score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO user_info(uid,`nick_name`,achievement,`level`,job,register_time) VALUES
(1001, '小丑1号', 3200, 7, '算法', '2020-01-01 10:00:00'),
(1002, '小丑2号', 2500, 6, '算法', '2020-01-01 10:00:00'),
(1003, '小丑3号', 2200, 5, '算法', '2020-01-01 10:00:00');
INSERT INTO examination_info(exam_id,tag,difficulty,duration,release_time) VALUES
(9001, 'SQL', 'hard', 60, '2020-01-01 10:00:00'),
(9002, 'SQL', 'hard', 80, '2020-01-01 10:00:00'),
(9003, '算法', 'hard', 80, '2020-01-01 10:00:00'),
(9004, 'PYTHON', 'medium', 70, '2020-01-01 10:00:00');
INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-01-01 09:01:01', '2020-01-01 09:21:59', 90),
(1002, 9001, '2020-01-20 10:01:01', null, null),
(1002, 9001, '2020-02-01 12:11:01', null, null),
(1003, 9001, '2020-03-01 19:01:01', null, null),
(1001, 9001, '2020-03-01 12:01:01', null, null),
(1002, 9001, '2020-03-01 12:01:01', '2020-03-01 12:41:01', 90),
(1002, 9001, '2020-05-02 19:01:01', '2020-05-02 19:32:00', 90),
(1001, 9002, '2020-01-02 19:01:01', '2020-01-02 19:59:01', 69),
(1001, 9002, '2020-02-02 12:01:01', '2020-02-02 12:20:01', 99),
(1002, 9002, '2020-02-02 12:01:01', null, null),
(1002, 9002, '2020-02-02 12:01:01', '2020-02-02 12:43:01', 81),
(1002, 9002, '2020-03-02 12:11:01', null, null),
(1001, 9001, '2020-01-02 10:01:01', '2020-01-02 10:31:01', 89),
(1001, 9002, '2020-01-01 12:11:01', null, null),
(1002, 9001, '2020-01-01 18:01:01', '2020-01-01 18:59:02', 90),
(1002, 9003, '2020-05-06 12:01:01', null, null),
(1001, 9002, '2020-05-05 18:01:01', null, null);
SELECT e1.uid,DATE_FORMAT(e1.start_time,'%Y%m') start_month
,COUNT(e1.exam_id) total_cnt
,COUNT(IF(e1.submit_time IS NOT NULL,1,NULL)) complete_cnt
FROM exam_record e1
JOIN user_info u ON u.uid = e1.uid
JOIN (
SELECT e.uid
,PERCENT_RANK()OVER(ORDER BY COUNT(IF(e.submit_time IS NULL,1,NULL))/COUNT(e.exam_id) DESC) num
FROM exam_record e LEFT JOIN examination_info ei ON e.exam_id = ei.exam_id
WHERE ei.tag = 'SQL'
GROUP BY e.uid
) tmp ON tmp.uid = u.uid
WHERE tmp.num <= 0.5
AND u.level IN (6,7)
AND e1.start_time >= '2020-02-01 00:00:00'
GROUP BY e1.uid,DATE_FORMAT(e1.start_time,'%Y%m')
ORDER BY e1.uid,start_month
返回相对于窗口的第一行的窗口框架的指定行的表达式值。
NTH_VALUE (expr, offset) OVER ([ PARTITION BY expr_list ][ ORDER BY order_list [ frame_clause ]])
-- expr 对其执行函数的目标列或表达式
-- offset 确定相对于要为其返回表达式的窗口中的第一行的行号。offset 可以是常数或表达式,且必须为大于 0 的正整数。
drop table if exists examination_info,exam_record;
-- 试卷信息表
CREATE TABLE examination_info (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
exam_id int UNIQUE NOT NULL COMMENT '试卷ID',
tag varchar(32) COMMENT '类别标签',
difficulty varchar(8) COMMENT '难度',
duration int NOT NULL COMMENT '时长',
release_time datetime COMMENT '发布时间'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
-- 试卷作答记录表
CREATE TABLE exam_record (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid int NOT NULL COMMENT '用户ID',
exam_id int NOT NULL COMMENT '试卷ID',
start_time datetime NOT NULL COMMENT '开始时间',
submit_time datetime COMMENT '提交时间',
score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO examination_info(exam_id,tag,difficulty,duration,release_time) VALUES
(9001, 'SQL', 'hard', 60, '2021-09-01 06:00:00'),
(9002, 'C++', 'hard', 60, '2021-09-01 06:00:00'),
(9003, '算法', 'medium', 80, '2021-09-01 10:00:00');
INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2021-09-01 09:01:01', '2021-09-01 09:51:01', 78),
(1001, 9002, '2021-09-01 09:01:01', '2021-09-01 09:31:00', 81),
(1002, 9002, '2021-09-01 12:01:01', '2021-09-01 12:31:01', 81),
(1003, 9001, '2021-09-01 19:01:01', '2021-09-01 19:59:01', 86),
(1003, 9002, '2021-09-01 12:01:01', '2021-09-01 12:31:51', 89),
(1004, 9002, '2021-09-01 19:01:01', '2021-09-01 19:30:01', 85),
(1005, 9001, '2021-09-01 12:01:01', '2021-09-01 12:31:02', 85),
(1006, 9001, '2021-09-07 10:01:01', '2021-09-07 10:12:01', 84),
(1003, 9001, '2021-09-08 12:01:01', '2021-09-08 12:11:01', 40),
(1003, 9002, '2021-09-01 14:01:01', null, null),
(1005, 9001, '2021-09-01 14:01:01', null, null),
(1003, 9003, '2021-09-08 15:01:01', null, null);
SELECT
distinct t1.exam_id,
duration,
release_time
FROM
(
SELECT er.exam_id,duration,release_time,
NTH_VALUE(TIMESTAMPDIFF(minute,start_time,submit_time),2) over(PARTITION BY exam_id ORDER BY TIMESTAMPDIFF(minute,start_time,submit_time) DESC) AS head_2,
NTH_VALUE(TIMESTAMPDIFF(minute,start_time,submit_time),2) over(PARTITION BY exam_id ORDER BY TIMESTAMPDIFF(minute,start_time,submit_time) ASC) AS end_2
FROM examination_info ei
LEFT JOIN exam_record er
USING(exam_id)
) t1
WHERE head_2-end_2 > duration*0.5
ORDER BY exam_id desc
计算窗口内该字段或表达式的最大值,语法与MAX()函数一致
计算窗口内该字段或表达式的最小值,语法与MIN()函数一致
计算窗口内该字段或表达式的平均值,语法与AVG()函数一致
计算窗口内该字段或表达式的和,语法与SUM()函数一致
SELECT start_month, COUNT(DISTINCT uid),
COUNT(DISTINCT IF(start_month=last_month, uid, NULL)),
MAX(COUNT(DISTINCT IF(start_month=last_month, uid, NULL))) OVER (ORDER BY start_month) ,
SUM(COUNT(DISTINCT IF(start_month=last_month, uid, NULL))) OVER (ORDER BY start_month)
FROM
(SELECT DATE_FORMAT(start_time, '%Y%m') AS start_month, uid,
MIN(DATE_FORMAT(start_time, '%Y%m')) OVER (PARTITION BY uid ORDER BY DATE_FORMAT(start_time, '%Y%m')) AS last_month
FROM exam_record) t
GROUP BY start_month
计算窗口内该字段或表达式的数量,语法与COUNT()函数一致
drop table if exists exam_record;
-- 试卷作答记录表
CREATE TABLE exam_record (
id int PRIMARY KEY AUTO_INCREMENT COMMENT '自增ID',
uid int NOT NULL COMMENT '用户ID',
exam_id int NOT NULL COMMENT '试卷ID',
start_time datetime NOT NULL COMMENT '开始时间',
submit_time datetime COMMENT '提交时间',
score tinyint COMMENT '得分'
)CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO exam_record(uid,exam_id,start_time,submit_time,score) VALUES
(1001, 9001, '2020-01-01 09:01:01', '2020-01-01 09:21:59', 90),
(1002, 9001, '2020-01-20 10:01:01', '2020-01-20 10:10:01', 89),
(1002, 9001, '2020-02-01 12:11:01', '2020-02-01 12:31:01', 83),
(1003, 9001, '2020-03-01 19:01:01', '2020-03-01 19:30:01', 75),
(1004, 9001, '2020-03-01 12:01:01', '2020-03-01 12:11:01', 60),
(1003, 9001, '2020-03-01 12:01:01', '2020-03-01 12:41:01', 90),
(1002, 9001, '2020-05-02 19:01:01', '2020-05-02 19:32:00', 90),
(1001, 9002, '2020-01-02 19:01:01', '2020-01-02 19:59:01', 69),
(1004, 9002, '2020-02-02 12:01:01', '2020-02-02 12:20:01', 99),
(1003, 9002, '2020-02-02 12:01:01', '2020-02-02 12:31:01', 68),
(1001, 9002, '2020-02-02 12:01:01', '2020-02-02 12:43:01', 81),
(1001, 9002, '2020-03-02 12:11:01', null, null);
SELECT DISTINCT exam_id,
DATE_FORMAT(start_time,'%Y%m') AS start_month,
COUNT(start_time) OVER(PARTITION BY exam_id,DATE_FORMAT(start_time,'%Y%m')) as month_cnt,
COUNT(start_time) OVER(PARTITION BY exam_id ORDER BY DATE_FORMAT(start_time,'%Y%m')) as cum_exam_cnt
FROM exam_record
ORDER BY exam_id, start_month
计算某个窗口中某个值的累积分布。使用以下公式确定百分比排名:(x) / (所在窗口行数)
-- 根据部门统计小于等于当前工资的人数占部门总人数的比例
SELECT
name,
dept,
salary,
CUME_DIST() OVER (PARTITION BY dept ORDER BY salary) as cume_dist
FROM user_salary
获取该字段或表达式在窗口内排行第一,或排行末位的值,针对获取单个值的可以跳过排名类窗口函数直接使用这两个函数
向上窗口函数,取出字段列名所在的列,向上N行的数据,作为独立的列,该函数可代替表的自联接,且效率更高
LAG (value_expr [, offset ],[, defaultvalue] ) OVER ([ PARTITION BY expr_list ][ ORDER BY order_list [ frame_clause ]])
-- field 对其执行函数的目标列或表达式
-- offset 往上查找的offset行的数据 默认1,偏移量为 0 表示当前行
-- defaultvalue: 没有符合条件的默认值
向下窗口函数,取出字段列名所在的列,向下N行的数据,作为独立的列,该函数可代替表的自联接,且效率更高
LAG (value_expr [, offset ],[, defaultvalue] ) OVER ([ PARTITION BY expr_list ][ ORDER BY order_list [ frame_clause ]])
-- field 对其执行函数的目标列或表达式
-- offset 往下查找的offset行的数据 默认1,偏移量为 0 表示当前行
-- defaultvalue: 没有符合条件的默认值
哇,你居然看完了,感谢感谢
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。