当前位置:   article > 正文

hive udf去掉map中的一个或者多个key

hive udf去掉map中的一个或者多个key

实现一个hive udf,可以将Map中的某一个或者多个key去掉,这里要继承GenericUDF 这个抽象类,然后Override evaluate这个函数即可,可以把执行这个udf前初始化的一些内容放在initialize方法内,比如参数的判断,函数的返回值类型等等。

代码写好之后,可以用如下方法创建这个函数:

add jar hive-udf-1.0-SNAPSHOT.jar;
create temporary function map_remove as 'com.xxx.hive.udf.MapRemove';
  • 1
  • 2

测试效果:
在这里插入图片描述
功能正常。

下面是关键代码:

pom.xml 文件:

 <?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.xxx</groupId>
    <artifactId>hive-udf</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <hive.version>2.3.5</hive.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.hive</groupId>
            <artifactId>hive-exec</artifactId>
            <version>${hive.version}</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.hadoop</groupId>
                    <artifactId>hadoop-common</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>org.pentaho</groupId>
                    <artifactId>pentaho-aggdesigner-algorithm</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.8.5</version>
        </dependency>
    </dependencies>

</project>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40

关键代码:

package com.xxx.hive.udf;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;

import java.util.HashMap;
import java.util.Map;


@Description(name = "map_remove", value = "_FUNC_(map,key1,key2.....) - "
        + "Remove the keys from the input map.")
public class MapRemove extends GenericUDF {

    private transient MapObjectInspector mapOI;
    private transient HashMap<?, ?> ret;

    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        if (arguments.length < 2) {
            throw new UDFArgumentLengthException("The function map_remove(map,key1,key2.....) needs at least two arguments.");
        } else if (!(arguments[0] instanceof MapObjectInspector)) {
            throw new UDFArgumentTypeException(0, "Argument 1"
                    + " of function map_remove must be \"" + Category.MAP.toString().toLowerCase()
                    + "\", but \"" + arguments[0].getTypeName() + "\" was found.");
        }
        mapOI = (MapObjectInspector) arguments[0];
        return ObjectInspectorFactory.getStandardMapObjectInspector(mapOI.getMapKeyObjectInspector(), mapOI.getMapValueObjectInspector());
    }

    @Override
    public Object evaluate(DeferredObject[] arguments) throws HiveException {
        Object mapObj = arguments[0].get();
        Map<?, ?> mapVal = mapOI.getMap(mapObj);
        if (mapVal != null) {
            ret = new HashMap<>(mapVal);
            for (int i = 1; i < arguments.length; i++) {
                ret.remove(arguments[i].get());
            }
        }
        return ret;
    }

    @Override
    public String getDisplayString(String[] strings) {
        return null;
    }
}

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/人工智能uu/article/detail/942412
推荐阅读
相关标签
  

闽ICP备14008679号