赞
踩
这里要继承GenericUDTF 这个抽象类,直接上代码:
package com.xxx.hive.udf;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
/**
* @author xxx
* CREATE temporary function details as 'com.xxx.hive.udf.Details' USING JAR '';
*/
@Description(name = "details", value = "_FUNC_(expr) - ")
public class Details extends GenericUDTF {
@Override
public void close() throws HiveException {
}
@Override
public void process(Object[] objects) throws HiveException {
if (objects.length == 0 || objects == null) {
return;
}
String input = objects[0].toString();
if (StringUtils.isEmpty(input)) {
return;
}
//example data
String details = "0_0_0_0_0~0_0~0_0_0_0_25-1800~0~19~0~19_4800~0~80~0~80_21200~1~0~0~0_20600~1~0~0~0_21100~1~0~0~0;" +
"1_1_1_0_9~8_9~9_0_0_27750_24-1800~0~19~0~19_4800~0~80~0~80_21200~1~0~0~0_20600~1~0~0~0_21100~1~0~0~0";
String[] lines = input.split(";");
for (String line : lines) {
String[] arrs = line.split("-");
if (arrs.length < 2) {
System.out.println("+++++" + input);
continue;
}
String[] header = arrs[0].split("_");
String[] steps = arrs[1].split("_");
/*
BEGIN: 2024-04-02 0~0无法转数字兼容
*/
try {
Integer.valueOf(header[3]);
} catch (Exception e) {
continue;
}
/*
END: 2024-04-02 0~0无法转数字兼容
*/
if (header.length > 4 && Integer.valueOf(header[3]) == 2) {
continue;
}
//过滤最后一个字段为空的情况 防止 header[9] 数组越界
if (arrs[0].endsWith("_")) {
continue;
}
if (header.length < 10) {
continue;
}
for (int i = 0; i < steps.length; i++) {
String[] step = steps[i].split("~");
if (step.length < 5) {
continue;
}
Object result = new Object[]{header[1], header[9], step[0], Integer.valueOf(step[4]) - Integer.valueOf(step[3])};
forward(result);
}
}
}
/**
* avoid hitting this exception: Error in query: No handler for UDF/UDAF/UDTF
*/
@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
if (args.length != 1) {
throw new UDFArgumentLengthException("xxx takes only one argument");
}
if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentException("xxx takes string as a parameter");
}
ArrayList<String> fieldNames = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("moves");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("left_moves");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("mubiao");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldNames.add("left_num");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
}
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。