赞
踩
写sql处理使用groupby 产生的数据倾斜问题:
- import java.util.Random
-
- import org.apache.spark.sql.SparkSession
-
- object TestUDF {
- def main(args: Array[String]): Unit = {
- val spark =
- SparkSession.builder()
- .appName("TestUDF")
- .enableHiveSupport()
- .getOrCreate()
-
-
- spark.udf.register("random_prefix", (value: Int, num: Int) => randomPrefixUDF(value, num))
- spark.udf.register("remove_random_prefix", (value: String) => removeRandomPrefixUDF(value))
-
- // 加随机前缀
- val sql1 =
- s"""
- |select
- | random_prefix(name, 6) product,
- | id
- |from
- | ggg.test
- """.stripMargin
-
- // 分组求和
- val sql2 =
- s"""
- |select
- | product,
- | sum(id) click
- |from
- | (
- | select
- | random_prefix(name, 6) product,
-
![](https://csdnimg.cn/release/blogv2/dist/pc/img/newCodeMoreWhite.png)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。