当前位置:   article > 正文

SQL面试题:拉链表实现_sql 生成拉链表

sql 生成拉链表

题目

  • 拉链表有什么用途?如何实现拉链表?

实现

第一步:建表和初始化数据

  1. CREATE TABLE IF NOT EXISTS test_dim_douyin_users
  2. (
  3. user_id INT COMMENT '用户ID',
  4. username STRING COMMENT '用户名',
  5. fans_cnt INT COMMENT '粉丝数',
  6. follow_cnt INT COMMENT '关注数',
  7. video_cnt INT COMMENT '视频数量',
  8. create_time STRING COMMENT '创建时间',
  9. update_time STRING COMMENT '修改时间',
  10. start_date STRING COMMENT '用户表同步日期和失效日期',
  11. end_date STRING COMMENT '最新用户表日期和更新日期'
  12. )
  13. COMMENT '抖音用户表'
  14. PARTITIONED BY (part_date STRING)
  15. STORED AS ORC;
  16. INSERT OVERWRITE TABLE test_dim_douyin_users PARTITION (part_date = '9999-12-31')
  17. VALUES (1, '大狼狗夫妇', 12, 5, 1, '2023-09-25 10:00:00', '2023-09-26 11:00:00', '2023-09-25', '9999-12-31');
  18. --创建增量表
  19. CREATE TABLE IF NOT EXISTS test_dim_douyin_users_incr
  20. (
  21. user_id INT COMMENT '用户ID',
  22. username STRING COMMENT '用户名',
  23. fans_cnt INT COMMENT '粉丝数',
  24. follow_cnt INT COMMENT '关注数',
  25. video_cnt INT COMMENT '视频数量',
  26. create_time STRING COMMENT '创建时间',
  27. update_time STRING COMMENT '修改时间'
  28. )
  29. COMMENT '抖音用户表-每日增量表'
  30. STORED AS ORC;
  31. INSERT OVERWRITE TABLE test_dim_douyin_users_incr
  32. VALUES (1, '大狼狗夫妇', 300, 5, 11, '2023-09-25 10:00:00', '2023-09-27 15:36:00');

第二步:插入新增数据

  1. --可以将'2023-09-28'替换为${part_date},然后使用动态分区
  2. --SET hive.exec.dynamic.partition.mode=nonstrict;
  3. WITH temp1 AS (
  4. SELECT
  5. user_id,
  6. username,
  7. fans_cnt,
  8. follow_cnt,
  9. video_cnt,
  10. create_time,
  11. update_time,
  12. start_date,
  13. end_date
  14. FROM
  15. test_dim_douyin_users
  16. WHERE
  17. part_date = '9999-12-31'
  18. UNION ALL
  19. SELECT
  20. user_id,
  21. username,
  22. fans_cnt,
  23. follow_cnt,
  24. video_cnt,
  25. create_time,
  26. update_time,
  27. '2023-09-27' start_date,
  28. '9999-12-31' end_date
  29. FROM
  30. (
  31. SELECT
  32. user_id,
  33. username,
  34. fans_cnt,
  35. follow_cnt,
  36. video_cnt,
  37. create_time,
  38. update_time,
  39. row_number() over (partition by user_id order by update_time desc) as rn
  40. FROM
  41. test_dim_douyin_users_incr
  42. WHERE
  43. date_format(create_time, 'yyyy-MM-dd') = '2023-09-27'
  44. OR date_format(update_time, 'yyyy-MM-dd') = '2023-09-27'
  45. ) t1
  46. WHERE
  47. rn = 1
  48. ), temp2 AS (
  49. SELECT
  50. user_id,
  51. username,
  52. fans_cnt,
  53. follow_cnt,
  54. video_cnt,
  55. create_time,
  56. update_time,
  57. start_date,
  58. end_date,
  59. row_number() over (partition by user_id order by update_time desc) rn
  60. FROM
  61. temp1
  62. )
  63. INSERT OVERWRITE TABLE test_dim_douyin_users PARTITION (part_date)
  64. SELECT
  65. user_id,
  66. username,
  67. fans_cnt,
  68. follow_cnt,
  69. video_cnt,
  70. create_time,
  71. update_time,
  72. start_date,
  73. if(rn = 1, '9999-12-31', date_sub('2023-09-27', 1)) end_date,
  74. if(rn = 1, '9999-12-31', date_sub('2023-09-27', 1)) part_date
  75. FROM
  76. temp2;

结果

第三步:再次插入新增数据

  1. INSERT OVERWRITE TABLE test_dim_douyin_users_incr
  2. VALUES (1, '大狼狗夫妇', 1000, 5, 14, '2023-09-25 10:00:00', '2023-09-28 15:37:00');
  3. WITH temp1 AS (
  4. SELECT
  5. user_id,
  6. username,
  7. fans_cnt,
  8. follow_cnt,
  9. video_cnt,
  10. create_time,
  11. update_time,
  12. start_date,
  13. end_date
  14. FROM
  15. test_dim_douyin_users
  16. WHERE
  17. part_date = '9999-12-31'
  18. UNION ALL
  19. SELECT
  20. user_id,
  21. username,
  22. fans_cnt,
  23. follow_cnt,
  24. video_cnt,
  25. create_time,
  26. update_time,
  27. '2023-09-28' start_date,
  28. '9999-12-31' end_date
  29. FROM
  30. (
  31. SELECT
  32. user_id,
  33. username,
  34. fans_cnt,
  35. follow_cnt,
  36. video_cnt,
  37. create_time,
  38. update_time,
  39. row_number() over (partition by user_id order by update_time desc) as rn
  40. FROM
  41. test_dim_douyin_users_incr
  42. WHERE
  43. date_format(create_time, 'yyyy-MM-dd') = '2023-09-28'
  44. OR date_format(update_time, 'yyyy-MM-dd') = '2023-09-28'
  45. ) t1
  46. WHERE
  47. rn = 1
  48. ), temp2 AS (
  49. SELECT
  50. user_id,
  51. username,
  52. fans_cnt,
  53. follow_cnt,
  54. video_cnt,
  55. create_time,
  56. update_time,
  57. start_date,
  58. end_date,
  59. row_number() over (partition by user_id order by update_time desc) rn
  60. FROM
  61. temp1
  62. )
  63. INSERT OVERWRITE TABLE test_dim_douyin_users PARTITION (part_date)
  64. SELECT
  65. user_id,
  66. username,
  67. fans_cnt,
  68. follow_cnt,
  69. video_cnt,
  70. create_time,
  71. update_time,
  72. start_date,
  73. if(rn = 1, '9999-12-31', date_sub('2023-09-28', 1)) end_date,
  74. if(rn = 1, '9999-12-31', date_sub('2023-09-28', 1)) part_date
  75. FROM
  76. temp2;

最终结果

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/喵喵爱编程/article/detail/867957
推荐阅读
相关标签
  

闽ICP备14008679号