赞
踩
行和列的互转
1.列转行 (对某列拆分,一列拆多行)
使用函数:lateral view explode(split(column, ‘,’)) num
eg: 如表:t_row_to_column_tmp 数据如下,对tag列进行拆分
SQL代码:
select
id,
tag,
tag_new
from
t_row_to_column_tmp
lateral view explode(split(tag, ‘,’)) num as tag_new where id=212022894 group by id;
2.行转列 (根据主键,进行多行合并一列)
使用函数:concat_ws(’,’ , collect_set(column)) --分割号是,
说明:collect_list 不去重,collect_set 去重。 column 的数据类型要求是 string
eg:如表:t_column_to_row ,根据id,对tag_new 进行合并
SQL代码2:
select
id,
concat_ws(’,’,collect_list(tag_new)) as tag_col
from t_column_to_row group by id;
3.多行数据转化成map类型
另外一个样例的地址:https://blog.csdn.net/huobumingbai1234/article/details/80559944
样例二:
1)原始数据格式
![](https://img-blog.csdnimg.cn/20181126102836182.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3NoZWVwODUyMQ==,size_16,color_FFFFFF,t_70)2)最后结果格式
代码实现start
1)为了统计用户的观看的电影的top5的内容。
首先根据用户的观看节目和得分对用户播放内容相关的二级分类进行top5
select
udid,
content_id,
content_name,
con_class_1_name,
con_class_2_name,
tag_class_1,
total_score,
row_number() over(partition by udid,con_class_2_name order by total_score desc,content_id,content_name,con_class_1_name,tag_class_1) rn
from
tmp.dws_kesheng_user_total_score_1m_delta_daily_20181123103021_${DT} --这个临时表是已经筛选了一级分类是“电影”的用户得分临时表
where con_class_2_name in ('导演','编剧','主演','内容类型','播出年代','出品方')
)t1 where t1.rn<=5
2)把用户的观看节目的属性进行统计,按照一级分类、二级分类的对枚举值tag_class_1进行合并。
对某个列做合并
concat_ws(';',collect_set(标签值)) as col1。主意这里使用的是分号;
select con_class_1_name, con_class_2_name, concat_ws('\u003B',collect_set(tag_class_1)) as col1 from ( select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'喜剧' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'爱情' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'武侠' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'言情' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'战争' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'张艺谋' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'王晶' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'王家卫' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'毕赣' as tag_class_1 ) t group by con_class_1_name, con_class_2_name
3)实现行转列
把字段“二级分类”的枚举值(导演,主演,内容类型)最终变成导演,主演,内容类型等字段。
collect_set : 如果想让key-value不重复的话。
这里使用的还是二级和标签组成map,二级作为key,对应的标签值作为value。
select 字段1,字段2,str_to_map(cast( concat_ws(",",collect_list(concat_ws(':',table1.二级,cast(table1.标签 as string)))) as string)) as kv
from table11
select con_class_1_name, concat_ws(",",collect_list(concat_ws(':',t2.con_class_2_name,cast(t2.col1 as string)))) from (select con_class_1_name, con_class_2_name, concat_ws('\u003B',collect_set(tag_class_1)) as col1 from ( select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'喜剧' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'爱情' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'武侠' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'言情' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'战争' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'张艺谋' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'王晶' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'王家卫' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'毕赣' as tag_class_1 ) t1 group by con_class_1_name, con_class_2_name ) t2 group by con_class_1_name
4)最后包装成map格式
select con_class_1_name, str_to_map(cast(concat_ws(",",collect_list(concat_ws(':',t2.con_class_2_name,cast(t2.col1 as string)))) as string )) kv from (select con_class_1_name, con_class_2_name, concat_ws('\u003B',collect_set(tag_class_1)) as col1 from ( select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'喜剧' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'爱情' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'武侠' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'言情' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'战争' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'张艺谋' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'王晶' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'王家卫' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'毕赣' as tag_class_1 ) t1 group by con_class_1_name, con_class_2_name ) t2 group by con_class_1_name
select con_class_1_name, kv['导演'] as director, kv['内容类型'] content_type from (select con_class_1_name, str_to_map(cast(concat_ws(",",collect_list(concat_ws(':',t2.con_class_2_name,cast(t2.col1 as string)))) as string )) kv from (select con_class_1_name, con_class_2_name, concat_ws('\u003B',collect_set(tag_class_1)) as col1 --'\u003B' 这个其实就是; from ( select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'喜剧' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'爱情' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'武侠' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'言情' as tag_class_1 union all select '电影' as con_class_1_name,'内容类型' as con_class_2_name,'战争' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'张艺谋' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'王晶' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'王家卫' as tag_class_1 union all select '电影' as con_class_1_name,'导演' as con_class_2_name,'毕赣' as tag_class_1 ) t1 group by con_class_1_name, con_class_2_name ) t2 group by con_class_1_name ) t3
1、多行合并一行的,这里需要使用 分号的分割符 ‘;‘会报错的
需要使用到另外的’’\u003B’’
2、str_to_map
英语翻译如下:
使用两个分隔符将文本拆分为键值对。 Delimiter1将文本分成K-V对,Delimiter2分割每个K-V对。
对于delimiter1默认分隔符是’,’ 。
对于delimiter2默认分隔符是’=’。虽然默认是"=" 连接的kv,但是使用冒号":"连接的kv也是可以转化为map形式的。
对于正常的value是一个数值得话直接使用就行了,直接会把字符串装成k-v的形式。见下面的例子
select
udid,
str_to_map(concat_ws(',',collect_set(concat_ws(':',tag_class_1,nums)))) kv
from
(select 'A' udid,'资讯' tag_class_1,'30' nums
union all
select 'A' udid,'冒险' tag_class_1,'50' nums
) t group by udid
但是对于的本案例的value是多个值组合的,如果concat_ws(’,’ tag_class_1) 是用的分割符逗号连接且使用了str_to_map的默认分割符逗号的话,那么使用这个函数的时候会把value也进行分割,最后个格式就不正确,后面换成了一种concat_ws的连接value的分割符号分号;然后就成功了。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。