赞
踩
create table t_afan_test
(
info1 array<int>,
info2 array<string>
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ',';
insert into t_afan_test values (array(12,23,23,34),array("what","are","this"));
insert into t_afan_test values (array(12,23,23,34,56,32),array("what","are","this","aaa"));
drop table if exists t_afan_test; create table t_afan_test ( info1 string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ','; insert into t_afan_test values ('[12,23,23,34]'); insert into t_afan_test values ('[22,33,43,54]'); select * from t_afan_test; +--------------------+ | t_afan_test.info1 | +--------------------+ | [12,23,23,34] | | [22,33,43,54] | +--------------------+ drop table if exists tmp_xxx; create table tmp_xxx as select split(regexp_extract(`info1`,'^\\[(.*)]$',1),',') as key_word_label from t_afan_test where `info1` is not null limit 10; select * from tmp_xxx; +-------------------------+ | tmp_xxx.key_word_label | +-------------------------+ | ["22","33","43","54"] | | ["12","23","23","34"] | +-------------------------+ select collect_list(cast(array_element as int)) int_array from (select explode(key_word_label) array_element from tmp_xxx) s; +----------------------------+ | int_array | +----------------------------+ | [22,33,43,54,12,23,23,34] | +----------------------------+ select explode(key_word_label) array_element from tmp_xxx; +----------------+ | array_element | +----------------+ | 22 | | 33 | | 43 | | 54 | | 12 | | 23 | | 23 | | 34 | +----------------+
转成array编写UDF,代码如下:
package com.xxx.stringtoarray; import org.apache.hadoop.hive.ql.exec.UDF; import java.util.ArrayList; import java.util.List; /** * 将数组格式的字符串 转成 整型数组 * @author tzq */ public final class StringToArray extends UDF { private static final String NULL_STRING = "null"; /** * 如果想最后hive的数据格式是struct<>, 返回值是Integer[]的。 * @param sourceText :源字符串 * @return */ public List<Integer> evaluate(String sourceText) { if (isBlank(sourceText)) { return null; } if (NULL_STRING.equalsIgnoreCase(sourceText)) { return null; } String[] arr1 = sourceText.replace("[","").replace("]","").split(","); //Integer[] arr2 = new Integer[arr1.length]; List<Integer> list = new ArrayList<>(); for(int i = 0; i < arr1.length; i++) { list.add(Integer.parseInt(arr1[i])); } return list; } public static boolean isBlank(String str) { int strLen; if (str != null && (strLen = str.length()) != 0) { for(int i = 0; i < strLen; ++i) { if (!Character.isWhitespace(str.charAt(i))) { return false; } } return true; } else { return true; } } }
hive中创建临时函数:
hive> add jar /xxx/xxx/xxx/xx.jar;
hive> create temporary function stringToArray as 'com.xxx.stringtoarray.StringToArray';
使用的时候,类似如下:
drop table if exists t_afan_test; create table t_afan_test ( info1 string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ','; insert into t_afan_test values ('[12,23,23,34]'); insert into t_afan_test values ('[22,33,43,54]'); drop table if exists tmp_xxx; create table tmp_xxx as select stringToArray(`info1`) as key_word_label from t_afan_test;
查看结果:
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。