当前位置:   article > 正文

hive源码之concat函数_hiveconcat函数

hiveconcat函数

目录

​一、函数使用

二、使用案例

三、源码分析

四、学习总结


大家好,我是老六。今天和大家一起来看下hive中内置函数concat的源码。

​一、函数使用

hive函数官方文档:LanguageManual UDF - Apache Hive - Apache Software Foundation

二、使用案例

 函数使用比较简单

结果

三、源码分析 

  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.ql.udf.generic;
  19. import org.apache.hadoop.hive.common.type.HiveChar;
  20. import org.apache.hadoop.hive.common.type.HiveVarchar;
  21. import org.apache.hadoop.hive.ql.exec.Description;
  22. import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
  23. import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
  24. import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupConcatColCol;
  25. import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatStringScalar;
  26. import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatCharScalar;
  27. import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatVarCharScalar;
  28. import org.apache.hadoop.hive.ql.exec.vector.expressions.StringScalarConcatStringGroupCol;
  29. import org.apache.hadoop.hive.ql.exec.vector.expressions.CharScalarConcatStringGroupCol;
  30. import org.apache.hadoop.hive.ql.exec.vector.expressions.VarCharScalarConcatStringGroupCol;
  31. import org.apache.hadoop.hive.ql.metadata.HiveException;
  32. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  33. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
  34. import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
  35. import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
  36. import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
  37. import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter;
  38. import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
  39. import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
  40. import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
  41. import org.apache.hadoop.io.BytesWritable;
  42. /**
  43. * GenericUDFConcat.
  44. */
  45. @Description(name = "concat",
  46. value = "_FUNC_(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or "+
  47. "_FUNC_(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data " +
  48. " bin1, bin2, ... binN",
  49. extended = "Returns NULL if any argument is NULL.\n"
  50. + "Example:\n"
  51. + " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n"
  52. + " 'abcdef'")
  53. @VectorizedExpressions({StringGroupConcatColCol.class,
  54. StringGroupColConcatStringScalar.class,
  55. StringGroupColConcatCharScalar.class, StringGroupColConcatVarCharScalar.class,
  56. StringScalarConcatStringGroupCol.class,
  57. CharScalarConcatStringGroupCol.class, VarCharScalarConcatStringGroupCol.class})
  58. public class GenericUDFConcat extends GenericUDF {
  59. private transient ObjectInspector[] argumentOIs;
  60. private transient StringConverter[] stringConverters;
  61. private transient PrimitiveCategory returnType = PrimitiveCategory.STRING;
  62. private transient BytesWritable[] bw;
  63. private transient GenericUDFUtils.StringHelper returnHelper;
  64. @Override
  65. public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
  66. // Loop through all the inputs to determine the appropriate return type/length.
  67. // Return type:
  68. // All CHAR inputs: return CHAR
  69. // All VARCHAR inputs: return VARCHAR
  70. // All CHAR/VARCHAR inputs: return VARCHAR
  71. // All BINARY inputs: return BINARY
  72. // Otherwise return STRING
  73. argumentOIs = arguments;
  74. PrimitiveCategory currentCategory;
  75. PrimitiveObjectInspector poi;
  76. boolean fixedLengthReturnValue = true;
  77. int returnLength = 0; // Only for char/varchar return types
  78. for (int idx = 0; idx < arguments.length; ++idx) {
  79. if (arguments[idx].getCategory() != Category.PRIMITIVE) {
  80. throw new UDFArgumentException("CONCAT only takes primitive arguments");
  81. }
  82. poi = (PrimitiveObjectInspector)arguments[idx];
  83. currentCategory = poi.getPrimitiveCategory();
  84. if (idx == 0) {
  85. returnType = currentCategory;
  86. }
  87. switch (currentCategory) {
  88. case BINARY:
  89. fixedLengthReturnValue = false;
  90. if (returnType != currentCategory) {
  91. // mix of binary/non-binary args
  92. returnType = PrimitiveCategory.STRING;
  93. }
  94. break;
  95. case CHAR:
  96. case VARCHAR:
  97. if (!fixedLengthReturnValue) {
  98. returnType = PrimitiveCategory.STRING;
  99. }
  100. if (fixedLengthReturnValue && currentCategory == PrimitiveCategory.VARCHAR) {
  101. returnType = PrimitiveCategory.VARCHAR;
  102. }
  103. break;
  104. default:
  105. returnType = PrimitiveCategory.STRING;
  106. fixedLengthReturnValue = false;
  107. break;
  108. }
  109. // If all arguments are of known length then we can keep track of the max
  110. // length of the return type. However if the return length exceeds the
  111. // max length for the char/varchar, then the return type reverts to string.
  112. if (fixedLengthReturnValue) {
  113. returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi);
  114. if ((returnType == PrimitiveCategory.VARCHAR
  115. && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH)
  116. || (returnType == PrimitiveCategory.CHAR
  117. && returnLength > HiveChar.MAX_CHAR_LENGTH)) {
  118. returnType = PrimitiveCategory.STRING;
  119. fixedLengthReturnValue = false;
  120. }
  121. }
  122. }
  123. if (returnType == PrimitiveCategory.BINARY) {
  124. bw = new BytesWritable[arguments.length];
  125. return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
  126. } else {
  127. // treat all inputs as string, the return value will be converted to the appropriate type.
  128. createStringConverters();
  129. returnHelper = new GenericUDFUtils.StringHelper(returnType);
  130. BaseCharTypeInfo typeInfo;
  131. switch (returnType) {
  132. case STRING:
  133. return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
  134. case CHAR:
  135. typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength);
  136. return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
  137. case VARCHAR:
  138. typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength);
  139. return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo);
  140. default:
  141. throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType);
  142. }
  143. }
  144. }
  145. private void createStringConverters() {
  146. stringConverters = new StringConverter[argumentOIs.length];
  147. for (int idx = 0; idx < argumentOIs.length; ++idx) {
  148. stringConverters[idx] = new StringConverter((PrimitiveObjectInspector) argumentOIs[idx]);
  149. }
  150. }
  151. @Override
  152. public Object evaluate(DeferredObject[] arguments) throws HiveException {
  153. if (returnType == PrimitiveCategory.BINARY) {
  154. return binaryEvaluate(arguments);
  155. } else {
  156. return returnHelper.setReturnValue(stringEvaluate(arguments));
  157. }
  158. }
  159. public Object binaryEvaluate(DeferredObject[] arguments) throws HiveException {
  160. int len = 0;
  161. for (int idx = 0; idx < arguments.length; ++idx) {
  162. bw[idx] = ((BinaryObjectInspector)argumentOIs[idx])
  163. .getPrimitiveWritableObject(arguments[idx].get());
  164. if (bw[idx] == null){
  165. return null;
  166. }
  167. len += bw[idx].getLength();
  168. }
  169. byte[] out = new byte[len];
  170. int curLen = 0;
  171. // Need to iterate twice since BytesWritable doesn't support append.
  172. for (BytesWritable bytes : bw){
  173. System.arraycopy(bytes.getBytes(), 0, out, curLen, bytes.getLength());
  174. curLen += bytes.getLength();
  175. }
  176. return new BytesWritable(out);
  177. }
  178. public String stringEvaluate(DeferredObject[] arguments) throws HiveException {
  179. StringBuilder sb = new StringBuilder();
  180. for (int idx = 0; idx < arguments.length; ++idx) {
  181. String val = null;
  182. if (arguments[idx] != null) {
  183. val = (String) stringConverters[idx].convert(arguments[idx].get());
  184. }
  185. if (val == null) {
  186. return null;
  187. }
  188. sb.append(val);
  189. }
  190. return sb.toString();
  191. }
  192. }

四、学习总结

需关注的函数有initialize()、evaluate()、binaryEvaluate()、stringEvaluate(),其中initialize()函数进行初始化,判断参数类型、进行类型转换以及设定返回类型,为下阶段进行准备;evaluate()内涵拼接逻辑,根据参数不同,分别调用了binaryEvaluate()、stringEvaluate(),这两个函数是真正实现拼接逻辑的函数,比较简单,大家自己看。

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/空白诗007/article/detail/883117
推荐阅读
相关标签
  

闽ICP备14008679号