当前位置:   article > 正文

hive元数据库rds查询_hive3通过元数据库查询表数据量

hive3通过元数据库查询表数据量

一、概念参考

Hive的元数据存储与元数据表

二、查询库信息

  1. -- 简单查询
  2. select
  3. `DB_ID`
  4. ,`NAME`
  5. ,`DB_LOCATION_URI`
  6. ,`DESC`
  7. from
  8. DBS
  9. where
  10. NAME = 'db_name'
  11. ;

三、查询表信息

  1. -- 查询表信息
  2. select
  3. *
  4. from
  5. TBLS
  6. where
  7. TBL_NAME = 'eu-west-1'
  8. limit 2
  9. ;
  10. -- 查询某些表的ID
  11. select
  12. *
  13. from (
  14. select
  15. TBL_ID
  16. ,concat(NAME,'.',TBL_NAME) as name
  17. from ( -- 表信息
  18. select
  19. DB_ID
  20. ,TBL_ID
  21. ,TBL_NAME
  22. from
  23. TBLS
  24. where
  25. REGION = 'eu-west-1'
  26. ) aa
  27. join ( -- 库信息
  28. select
  29. DB_ID
  30. ,NAME
  31. from
  32. DBS
  33. where
  34. REGION = 'eu-west-1'
  35. ) bb
  36. on aa.DB_ID = bb.DB_ID
  37. ) aa
  38. where
  39. name in ('ssjt.shy', 'ssjt.shy2')
  40. ;
  41. -- 选择某个库下的一批表
  42. select
  43. aa.TBL_ID
  44. ,bb.NAME as db_name
  45. ,aa.TBL_NAME as tb_name
  46. ,concat(bb.NAME,'.',aa.TBL_NAME) as name
  47. from (
  48. select
  49. DB_ID
  50. ,TBL_ID
  51. ,TBL_NAME
  52. from
  53. TBLS
  54. where
  55. REGION = 'eu-west-1'
  56. AND DELETE_TIME = 0
  57. ) aa
  58. join (
  59. select
  60. DB_ID
  61. ,NAME
  62. from
  63. DBS
  64. where
  65. REGION = 'eu-west-1'
  66. and NAME = 'tranods'
  67. ) bb
  68. on aa.DB_ID = bb.DB_ID
  69. limit 50
  70. ;
  71. -- 每个库下有多少张表
  72. select
  73. bb.NAME
  74. ,aa.cnt
  75. ,aa.cnt2
  76. ,aa.error
  77. from (
  78. select
  79. DB_ID
  80. ,COUNT(TBL_NAME) cnt
  81. ,COUNT( DISTINCT TBL_NAME) cnt2
  82. ,COUNT(TBL_NAME) - COUNT( DISTINCT TBL_NAME) error
  83. from
  84. TBLS
  85. where
  86. REGION = 'eu-west-1'
  87. AND DELETE_TIME = 0
  88. AND TBL_NAME NOT LIKE 'values__tmp__table__%' -- 指定表的限制条件
  89. group by
  90. DB_ID
  91. ) aa
  92. join (
  93. select
  94. DB_ID
  95. ,NAME
  96. from
  97. DBS
  98. where
  99. REGION = 'eu-west-1'
  100. ) bb
  101. on aa.DB_ID = bb.DB_ID
  102. order by
  103. aa.cnt desc
  104. ;
  105. -- 查询表的路径
  106. select
  107. concat(bb.NAME, '.', aa.TBL_NAME) as name
  108. ,bb.NAME db_name
  109. ,aa.TBL_NAME tb_name
  110. ,cc.LOCATION as location
  111. from
  112. TBLS aa
  113. join
  114. DBS bb
  115. on aa.DB_ID = bb.DB_ID
  116. join
  117. SDS cc
  118. on aa.SD_ID = cc.SD_ID
  119. where
  120. aa.DELETE_TIME = 0
  121. AND aa.REGION = 'eu-west-1'
  122. AND bb.REGION = 'eu-west-1'
  123. AND cc.DELETE_TIME = 0
  124. AND cc.LOCATION like '%-ind%'
  125. ;
  126. -- 表分区的最新修改时间
  127. select
  128. concat(dd.NAME, '.', cc.TBL_NAME) as name
  129. ,dd.NAME db_name
  130. ,cc.TBL_NAME tb_name
  131. ,aa.last_time
  132. ,bb.PARAM_VALUE last_modified
  133. from ( -- 分区最新更新时间
  134. select
  135. TBL_ID
  136. ,max(UPDATE_TIME) last_time
  137. from
  138. HIVE_PARTITIONS
  139. where
  140. DELETE_TIME = 0
  141. group by
  142. TBL_ID
  143. ) aa
  144. join -- 最后修改者
  145. HIVE_TABLE_PARAMS bb
  146. on aa.TBL_ID = bb.TBL_ID
  147. join -- 表名信息
  148. HIVE_TBLS cc
  149. on aa.TBL_ID = cc.TBL_ID
  150. join -- 库名信息
  151. HIVE_DBS dd
  152. on cc.DB_ID = dd.DB_ID
  153. where
  154. aa.last_time < 1659283200000 -- 过滤2022-08-01后没再更新的表
  155. AND bb.PARAM_KEY = 'last_modified_by'
  156. ;

四、查询分区信息

  1. -- 查询某些表的分区数
  2. -- 限定表统计分区,然后再关联出库名、表名
  3. select
  4. concat(cc.NAME, '.', bb.TBL_NAME) as name
  5. ,aa.cnt
  6. from (
  7. select
  8. TBL_ID
  9. ,count(1) as cnt
  10. from
  11. HIVE_PARTITIONS
  12. where
  13. DELETE_TIME = 0
  14. AND TBL_ID IN ( -- 指定要查询的表
  15. select
  16. TBL_ID
  17. from (
  18. select
  19. TBL_ID
  20. ,concat(NAME,'.',TBL_NAME) as name
  21. from (
  22. select
  23. DB_ID
  24. ,TBL_ID
  25. ,TBL_NAME
  26. from
  27. HIVE_TBLS
  28. where
  29. REGION = 'eu-west-1'
  30. AND DELETE_TIME = 0
  31. ) aa
  32. join (
  33. select
  34. DB_ID
  35. ,NAME
  36. from
  37. HIVE_DBS
  38. where
  39. REGION = 'eu-west-1'
  40. ) bb
  41. on aa.DB_ID = bb.DB_ID
  42. ) aa
  43. where
  44. name in ('ssjt.shy', 'ssjt.shy')
  45. )
  46. group by
  47. TBL_ID
  48. ) aa
  49. join
  50. HIVE_TBLS bb
  51. on aa.TBL_ID = bb.TBL_ID
  52. join
  53. HIVE_DBS cc
  54. on bb.DB_ID = cc.DB_ID
  55. ;
  56. -- 不限定表查询分区数
  57. select
  58. concat(cc.NAME, '.', bb.TBL_NAME) as name
  59. ,aa.cnt
  60. from (
  61. select
  62. TBL_ID
  63. ,count(1) as cnt
  64. from
  65. HIVE_PARTITIONS
  66. where
  67. DELETE_TIME = 0
  68. group by
  69. TBL_ID
  70. ) aa
  71. join
  72. HIVE_TBLS bb
  73. on aa.TBL_ID = bb.TBL_ID
  74. join
  75. HIVE_DBS cc
  76. on bb.DB_ID = cc.DB_ID
  77. where
  78. bb.REGION = 'eu-west-1'
  79. AND bb.DELETE_TIME = 0
  80. AND cc.REGION = 'eu-west-1'
  81. order by
  82. aa.cnt desc
  83. limit 100
  84. ;

五、查询函数信息

  1. -- 查询函数
  2. select
  3. *
  4. from
  5. funcs aa
  6. where
  7. FUNC_NAME='getwordandtranslate'
  8. ;
  9. -- 查询资源
  10. select
  11. *
  12. from
  13. func_ru
  14. where
  15. FUNC_ID = 28
  16. ;
  17. -- 查询详细信息
  18. select
  19. bb.NAME
  20. ,aa.FUNC_NAME
  21. ,aa.CLASS_NAME
  22. ,cc.RESOURCE_URI
  23. from
  24. funcs aa
  25. join
  26. dbs bb
  27. on aa.DB_ID = bb.DB_ID
  28. join
  29. func_ru cc
  30. on aa.FUNC_ID = cc.FUNC_ID
  31. ;

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/木道寻08/article/detail/969467
推荐阅读
相关标签
  

闽ICP备14008679号