赞
踩
目录
大家好,我是老六。今天和大家一起来看下hive中内置函数concat的源码。
hive函数官方文档:LanguageManual UDF - Apache Hive - Apache Software Foundation
函数使用比较简单
结果
- /**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- package org.apache.hadoop.hive.ql.udf.generic;
-
- import org.apache.hadoop.hive.common.type.HiveChar;
- import org.apache.hadoop.hive.common.type.HiveVarchar;
- import org.apache.hadoop.hive.ql.exec.Description;
- import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
- import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupConcatColCol;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatStringScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatVarCharScalar;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.StringScalarConcatStringGroupCol;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.CharScalarConcatStringGroupCol;
- import org.apache.hadoop.hive.ql.exec.vector.expressions.VarCharScalarConcatStringGroupCol;
- import org.apache.hadoop.hive.ql.metadata.HiveException;
- import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
- import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
- import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
- import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
- import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
- import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter;
- import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
- import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
- import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
- import org.apache.hadoop.io.BytesWritable;
-
- /**
- * GenericUDFConcat.
- */
- @Description(name = "concat",
- value = "_FUNC_(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or "+
- "_FUNC_(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data " +
- " bin1, bin2, ... binN",
- extended = "Returns NULL if any argument is NULL.\n"
- + "Example:\n"
- + " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n"
- + " 'abcdef'")
- @VectorizedExpressions({StringGroupConcatColCol.class,
- StringGroupColConcatStringScalar.class,
- StringGroupColConcatCharScalar.class, StringGroupColConcatVarCharScalar.class,
- StringScalarConcatStringGroupCol.class,
- CharScalarConcatStringGroupCol.class, VarCharScalarConcatStringGroupCol.class})
- public class GenericUDFConcat extends GenericUDF {
- private transient ObjectInspector[] argumentOIs;
- private transient StringConverter[] stringConverters;
- private transient PrimitiveCategory returnType = PrimitiveCategory.STRING;
- private transient BytesWritable[] bw;
- private transient GenericUDFUtils.StringHelper returnHelper;
-
- @Override
- public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
-
- // Loop through all the inputs to determine the appropriate return type/length.
- // Return type:
- // All CHAR inputs: return CHAR
- // All VARCHAR inputs: return VARCHAR
- // All CHAR/VARCHAR inputs: return VARCHAR
- // All BINARY inputs: return BINARY
- // Otherwise return STRING
- argumentOIs = arguments;
-
- PrimitiveCategory currentCategory;
- PrimitiveObjectInspector poi;
- boolean fixedLengthReturnValue = true;
- int returnLength = 0; // Only for char/varchar return types
- for (int idx = 0; idx < arguments.length; ++idx) {
- if (arguments[idx].getCategory() != Category.PRIMITIVE) {
- throw new UDFArgumentException("CONCAT only takes primitive arguments");
- }
- poi = (PrimitiveObjectInspector)arguments[idx];
- currentCategory = poi.getPrimitiveCategory();
- if (idx == 0) {
- returnType = currentCategory;
- }
- switch (currentCategory) {
- case BINARY:
- fixedLengthReturnValue = false;
- if (returnType != currentCategory) {
- // mix of binary/non-binary args
- returnType = PrimitiveCategory.STRING;
- }
- break;
- case CHAR:
- case VARCHAR:
- if (!fixedLengthReturnValue) {
- returnType = PrimitiveCategory.STRING;
- }
- if (fixedLengthReturnValue && currentCategory == PrimitiveCategory.VARCHAR) {
- returnType = PrimitiveCategory.VARCHAR;
- }
- break;
- default:
- returnType = PrimitiveCategory.STRING;
- fixedLengthReturnValue = false;
- break;
- }
-
- // If all arguments are of known length then we can keep track of the max
- // length of the return type. However if the return length exceeds the
- // max length for the char/varchar, then the return type reverts to string.
- if (fixedLengthReturnValue) {
- returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi);
- if ((returnType == PrimitiveCategory.VARCHAR
- && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH)
- || (returnType == PrimitiveCategory.CHAR
- && returnLength > HiveChar.MAX_CHAR_LENGTH)) {
- returnType = PrimitiveCategory.STRING;
- fixedLengthReturnValue = false;
- }
- }
- }
-
- if (returnType == PrimitiveCategory.BINARY) {
- bw = new BytesWritable[arguments.length];
- return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
- } else {
- // treat all inputs as string, the return value will be converted to the appropriate type.
- createStringConverters();
- returnHelper = new GenericUDFUtils.StringHelper(returnType);
- BaseCharTypeInfo typeInfo;
- switch (returnType) {
- case STRING:
- return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
- case CHAR:
- typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength);
- return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
- case VARCHAR:
- typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
- return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
- default:
- throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType);
- }
- }
- }
-
- private void createStringConverters() {
- stringConverters = new StringConverter[argumentOIs.length];
- for (int idx = 0; idx < argumentOIs.length; ++idx) {
- stringConverters[idx] = new StringConverter((PrimitiveObjectInspector) argumentOIs[idx]);
- }
- }
-
- @Override
- public Object evaluate(DeferredObject[] arguments) throws HiveException {
- if (returnType == PrimitiveCategory.BINARY) {
- return binaryEvaluate(arguments);
- } else {
- return returnHelper.setReturnValue(stringEvaluate(arguments));
- }
- }
-
- public Object binaryEvaluate(DeferredObject[] arguments) throws HiveException {
- int len = 0;
- for (int idx = 0; idx < arguments.length; ++idx) {
- bw[idx] = ((BinaryObjectInspector)argumentOIs[idx])
- .getPrimitiveWritableObject(arguments[idx].get());
- if (bw[idx] == null){
- return null;
- }
- len += bw[idx].getLength();
- }
-
- byte[] out = new byte[len];
- int curLen = 0;
- // Need to iterate twice since BytesWritable doesn't support append.
- for (BytesWritable bytes : bw){
- System.arraycopy(bytes.getBytes(), 0, out, curLen, bytes.getLength());
- curLen += bytes.getLength();
- }
- return new BytesWritable(out);
- }
-
- public String stringEvaluate(DeferredObject[] arguments) throws HiveException {
- StringBuilder sb = new StringBuilder();
- for (int idx = 0; idx < arguments.length; ++idx) {
- String val = null;
- if (arguments[idx] != null) {
- val = (String) stringConverters[idx].convert(arguments[idx].get());
- }
- if (val == null) {
- return null;
- }
- sb.append(val);
- }
- return sb.toString();
- }
-
- }
需关注的函数有initialize()、evaluate()、binaryEvaluate()、stringEvaluate(),其中initialize()函数进行初始化,判断参数类型、进行类型转换以及设定返回类型,为下阶段进行准备;evaluate()内涵拼接逻辑,根据参数不同,分别调用了binaryEvaluate()、stringEvaluate(),这两个函数是真正实现拼接逻辑的函数,比较简单,大家自己看。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。