赞
踩
实现一个hive udf,可以将Map中的某一个或者多个key去掉,这里要继承GenericUDF 这个抽象类,然后Override evaluate这个函数即可,可以把执行这个udf前初始化的一些内容放在initialize方法内,比如参数的判断,函数的返回值类型等等。
代码写好之后,可以用如下方法创建这个函数:
add jar hive-udf-1.0-SNAPSHOT.jar;
create temporary function map_remove as 'com.xxx.hive.udf.MapRemove';
测试效果:
功能正常。
下面是关键代码:
pom.xml 文件:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.xxx</groupId>
<artifactId>hive-udf</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<hive.version>2.3.5</hive.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</exclusion>
<exclusion>
<groupId>org.pentaho</groupId>
<artifactId>pentaho-aggdesigner-algorithm</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.8.5</version>
</dependency>
</dependencies>
</project>
关键代码:
package com.xxx.hive.udf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import java.util.HashMap;
import java.util.Map;
@Description(name = "map_remove", value = "_FUNC_(map,key1,key2.....) - "
+ "Remove the keys from the input map.")
public class MapRemove extends GenericUDF {
private transient MapObjectInspector mapOI;
private transient HashMap<?, ?> ret;
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if (arguments.length < 2) {
throw new UDFArgumentLengthException("The function map_remove(map,key1,key2.....) needs at least two arguments.");
} else if (!(arguments[0] instanceof MapObjectInspector)) {
throw new UDFArgumentTypeException(0, "Argument 1"
+ " of function map_remove must be \"" + Category.MAP.toString().toLowerCase()
+ "\", but \"" + arguments[0].getTypeName() + "\" was found.");
}
mapOI = (MapObjectInspector) arguments[0];
return ObjectInspectorFactory.getStandardMapObjectInspector(mapOI.getMapKeyObjectInspector(), mapOI.getMapValueObjectInspector());
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
Object mapObj = arguments[0].get();
Map<?, ?> mapVal = mapOI.getMap(mapObj);
if (mapVal != null) {
ret = new HashMap<>(mapVal);
for (int i = 1; i < arguments.length; i++) {
ret.remove(arguments[i].get());
}
}
return ret;
}
@Override
public String getDisplayString(String[] strings) {
return null;
}
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。