赞
踩
cd /data/import/
sudo vi test_explode_map_array.txt
添加以下文件内容
小明 产品1,产品2,产品3 性别:男,年龄:24
小花 产品4,产品5,产品6 性别:女,年龄:22
- -- 开启智能本地模式
- -- set hive.exec.mode.local.auto=true;
-
- -- 创建表
- create table test.test_explode_map_array(
- name string,
- prod_arr array<string>,
- info_map map<string, string>)
- row format delimited
- -- 字段分隔符为'\t'
- fields terminated by '\t'
- collection items terminated by ','
- map keys terminated by ':'
- stored as textfile;
-
- -- 加载数据(方法一)
- load data local inpath '/data/import/test_explode_map_array.txt'
- into table test.test_explode_map_array;
-
- -- 加载数据(方法二)
- insert into test.test_explode_map_array
- select '小明', array('产品1', '产品2', '产品3'), str_to_map('性别:男,年龄:24');
- insert into test.test_explode_map_array
- select '小花', array('产品4', '产品5', '产品6'), str_to_map('性别:女,年龄:22');
map_key | map_value |
---|---|
年龄 | 24 |
性别 | 男 |
年龄 | 22 |
性别 | 女 |
- select
- -- explode拆分后必须加括号
- explode(info_map) as (map_key, map_value)
- from test.test_explode_map_array;
-
-
-
- -- 查询其他字段, lateral view(侧视图, 虚拟表)
- select
- name, map_key, map_value
- from test.test_explode_map_array
- -- 必须去掉括号map_key, map_value
- lateral view explode(info_map) tmp_table as map_key, map_value;
prod_arr_new |
---|
产品1 |
产品2 |
产品3 |
产品4 |
产品5 |
产品6 |
- select
- -- explode拆分数组
- explode(prod_arr) as prod_arr_new
- from test.test_explode_map_array;
name | prod_arr_new |
---|---|
小明 | 产品1 |
小明 | 产品2 |
小明 | 产品3 |
小花 | 产品4 |
小花 | 产品5 |
小花 | 产品6 |
-
- -- 查询其他字段,lateral view(侧视图, 虚拟表)
- select
- name, prod_arr_new
- from test.test_explode_map_array
- lateral view explode(prod_arr) tmp_table as prod_arr_new;
UDF:用户自定义函数(user-defined function),输入一个值,返回一个值,一进一出
UDAF:用户自定义聚合函数(user-defined aggregate function),输入多个值,返回一个值,多进一出
UDTF:用户自定义表生成函数(user-defined table-generating function),输入一个值,返回多个值,一进多出
-- UDF:length\year\month\day ...
-- UDAF:sum\count\max ...
-- UDTF:explode
cd /data/import/
sudo vi test_explode_json.txt
添加以下文件内容
a:shandong,b:beijing,c:hebei|1,2,3,4,5,6,7,8,9|[{"source":"7fresh","monthSales":4900,"userCount":1900,"score":"9.9"},{"source":"jd","monthSales":2090,"userCount":78981,"score":"9.8"},{"source":"jdmart","monthSales":6987,"userCount":1600,"score":"9.0"}]
- -- 开启智能本地模式
- -- set hive.exec.mode.local.auto=true;
-
- -- 创建表
- create table test.test_explode_json(
- area string,
- goods_id string,
- sale_info string)
- row format delimited
- -- 字段分隔符为'|'
- fields terminated by '|'
- stored as textfile;
-
- -- 加载数据(方法一)
- load data local inpath '/data/import/test_explode_json.txt'
- overwrite into table test.test_explode_json;
-
- -- 加载数据(方法二)
- insert into test.test_explode_json
- values('a:shandong,b:beijing,c:hebei',
- '1,2,3,4,5,6,7,8,9',
- '[{"source":"7fresh","monthSales":4900,"userCount":1900,"score":"9.9"},{"source":"jd","monthSales":2090,"userCount":78981,"score":"9.8"},{"source":"jdmart","monthSales":6987,"userCount":1600,"score":"9.0"}]')
goods_id |
---|
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
-
- select
- explode(split(goods_id,',')) as goods_id
- from test.test_explode_json;
code | area |
---|---|
a | shandong |
b | beijing |
c | hebei |
-
- select
- regexp_extract(area_new, '(.*):(.*)',1) as code
- , regexp_extract(area_new, '(.*):(.*)',2) as arec
- from test.test_explode_json
- lateral view explode(split(area,',')) tmp_table as area_new
-
lateral view + explode + split
结果集的行数是如何生成的?
查询1个字段
- -- 9个元素= 9行
- select
- goods_id_new
- from test.test_explode_json
- lateral view explode(split(goods_id,',')) tmp_table as goods_id_new;
查询2个字段
- -- 9个元素*1个元素 = 9行
- select
- goods_id_new,area
- from test.test_explode_json
- lateral view explode(split(goods_id,',')) tmp_table as goods_id_new;
查询3个字段
- -- 9个元素*3个元素*1个元素=27行
- select
- goods_id_new,area_new,sale_info
- from test.test_explode_json
- lateral view explode(split(goods_id,',')) tmp_table as goods_id_new
- lateral view explode(split(area,',')) tmp_table as area_new;
source | monthsales | usercount | score |
---|---|---|---|
7fresh | 4900 | 1900 | 9.9 |
jd | 2090 | 78981 | 9.8 |
jdmart | 6987 | 1600 | 9.0 |
第一步:将字段sale_info的"[{"和"}]"替换为空字符串
- "[{"source":"7fresh","monthSales":4900,"userCount":1900,"score":"9.9"},{"source":"jd","monthSales":2090,"userCount":78981,"score":"9.8"},{"source":"jdmart","monthSales":6987,"userCount":1600,"score":"9.0"}]"
-
- select
- regexp_replace(sale_info, '\\[\\{|\\}\\]','')
- from test.test_explode_json
第二步:以"},{"拆分为数组
- select
- explode(split(regexp_replace(sale_info, '\\[\\{|\\}\\]',''), '\\},\\{'))
- from test.test_explode_json
第三步:将数组拆分为多行
- select
- sale_info_new
- from test.test_explode_json
- lateral view explode(split(regexp_replace(sale_info, '\\[\\{|\\}\\]',''), '\\},\\{')) tmp_table as sale_info_new
第四步:转换为json字符串
- select
- concat('{', sale_info_new, '}')
- from test.test_explode_json
- lateral view explode(split(regexp_replace(sale_info, '\\[\\{|\\}\\]',''), '\\},\\{')) tmp_table as sale_info_new
第五步:将json字符串转换为二维表
- select
- get_json_object(concat('{', sale_info_new, '}'), '$.source') as source
- , get_json_object(concat('{', sale_info_new, '}'), '$.monthSales') as monthSales
- , get_json_object(concat('{', sale_info_new, '}'), '$.userCount') as userCount
- , get_json_object(concat('{', sale_info_new, '}'), '$.score') as score
- from test.test_explode_json
- lateral view explode(split(regexp_replace(sale_info, '\\[\\{|\\}\\]',''), '\\},\\{')) tmp_table as sale_info_new
region_category | subclass |
---|---|
东北-办公 | 系固件,纸张,收纳具,信封,器具,美术,用品,装订机,标签 |
东北-家具 | 用具,椅子,桌子,书架 |
东北-技术 | 电话,配件,复印机,设备 |
- -- concat(str1|col1, str2|col2, …)
- -- 字符串合并,支持任意个字符串;
-
- -- concat_ws(sep, str1, str2, ...)
- -- 以sep为分隔符合并str1, str2, ...;如分隔符为null,则返回null;跳过合并值为null或空字符串的参数;
-
- -- collect_set(col)去重汇总
- -- 只支持基本数据类型(不支持复合数据类型),将某字段的项去重汇总,返回array数据类型;
- -- collect_list(col)不去重汇总
-
-
-
- select
- CONCAT(region, '-',category) as region_category
- , concat_ws(',', collect_set(subclass)) as `产品子类`
- from sm.sm_order_total
- group by CONCAT(region, '-',category)
- -- 通过以上查询语句来创建表
- create table test.test_explode_split as
- select
- CONCAT(region, '-',category) as region_category
- , concat_ws(',', collect_set(subclass)) as `产品子类`
- from sm.sm_order_total
- group by CONCAT(region, '-',category)
region | category | subclass_new |
---|---|---|
东北 | 办公 | 系固件 |
东北 | 办公 | 纸张 |
东北 | 办公 | 收纳具 |
东北 | 办公 | 信封 |
东北 | 办公 | 器具 |
东北 | 办公 | 美术 |
东北 | 办公 | 用品 |
东北 | 办公 | 装订机 |
东北 | 办公 | 标签 |
- select
- regexp_extract(t.region_category, '(.*)-(.*)', 1) as region
- , regexp_extract(t.region_category, '(.*)-(.*)', 2) as category
- , subclass_new
- from test.test_explode_split t
- lateral view explode(split(t.subclass, ',')) tmp_table as subclass_new;
支持调用java函数
cd /data/import/
sudo vi test_reflect.txt
添加以下文件内容
11 配饰 7 7
12 配饰 9 5
13 配饰 5 7
14 服饰 9 5
15 服饰 9 4
16 配饰 7 5
17 服饰 8 3
18 配饰 6 5
19 服饰 5 4
20 配饰 9 4
- use test;
- -- 创建表
- create table test.test_reflect(
- order_id int comment '订单编号',
- product string comment '产品',
- quality int comment '质量',
- service int comment '服务')
- row format delimited
- fields terminated by '\t';
- -- 加载数据
- load data local inpath '/data/import/test_reflect.txt'
- into table test.test_reflect;
-
-
-
- select
- *
- , reflect('java.lang.Math','max',quality, service) as max_score
- from test.test_reflect
配饰:求最大值
服饰:求最小值
-
- select
- t.order_id
- , t.product
- , t.quality
- , t.service
- , reflect('java.lang.Math', method_name, t.quality, t.service) as score
- from
- (
- select
- order_id
- , product
- , quality
- , service
- , case product when '配饰' then 'max'
- when '服饰' then 'min' end as method_name
- from test.test_reflect
- ) as t
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。