1. create table
create table tmp_2 (resource_id bigint ,v int) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,' LINES TERMINATED BY '\n' STORED AS TEXTFILE
//ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,'---这里设置字段间以逗号分隔; //LINES TERMINATED BY '\n' ---这里设置行与行之间以换行分隔 //STORED AS TEXTFILE ---指定文件以text形式存储;Hive 中默认有三个文件格式 TextFile,SequenceFile 以及 RCFile
//在建表的时候还可以通过"PARTITIONED BY(file STRING)"指定分区字段
alter table hive_tb set serdeproperties('serialization.null.format' = '');
drop table if exists sa_base_order; create table IF NOT EXISTS sa_base_order...
使用上面两个判断能够,避免创建表已经存在的error
CREATE EXTERNAL TABLE page_view(viewTime INT, userid BIGINT, page_url STRING, referrer_url STRING, ip STRING COMMENT 'IP Address of the User', country STRING COMMENT 'country of origination' ) COMMENT 'This is the staging page view table' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\054' STORED AS TEXTFILE LOCATION '<hdfs_location>';
alter table external_wp add partition (dt='2016-05-20') location '/user/upload/wp/2016-05-20';
2. 将分析结果导入目标表
3. ODPS(阿里大数据平台) & HIVE
dateadd(to_date('@@{yyyy-MM-dd}','yyyy-mm-dd'),-400,'dd'),' ',1)
DATE_SUB(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),400)
split_part(CAST(to_date('@@{yyyy-MM-dd}','yyyy-mm-dd') AS STRING), '-',1) split(CAST(FROM_UNIXTIME(UNIX_TIMESTAMP() ,'yyyy-MM-dd')AS STRING), '-')[0]
--odps SELECT '@@{yyyy-MM-dd}', -- time(2015-12-02) DATEADD(to_date('@@{yyyy-MM-dd}','yyyy-mm-dd'),-400,'dd'), -- 2014-10-28 split_part(CAST(to_date('@@{yyyy-MM-dd}','yyyy-mm-dd') AS STRING), '-',1), -- 2015 split_part(CAST(to_date('@@{yyyy-MM-dd}','yyyy-mm-dd') AS STRING), '-',2), -- 12 split_part(CAST(to_date('@@{yyyy-MM-dd}','yyyy-mm-dd') AS STRING), '-',3) -- 02 FROM DUAL; --hive USE test; SELECT UNIX_TIMESTAMP(), -- unix time(2015-12-02), 秒为单位 DATE_SUB(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),400), -- 2014-10-28 split(CAST(FROM_UNIXTIME(UNIX_TIMESTAMP() ,'yyyy-MM-dd')AS STRING), '-')[0], -- 2015 split(CAST(FROM_UNIXTIME(UNIX_TIMESTAMP() ,'yyyy-MM-dd')AS STRING), '-')[1], -- 12 split(CAST(FROM_UNIXTIME(UNIX_TIMESTAMP() ,'yyyy-MM-dd')AS STRING), '-')[2] -- 02 FROM DUAL;
4. lateral view 使用- 把横表转纵表参考
和split, explode等UDTF一起使用,将一行数据拆成多行数据(UDTF),在此基础上可以对拆分后的数据进行聚合(虚拟表)
lateral view首先为原始表的每行调用UDTF,UTDF会把一行拆分成一或者多行,lateral view再把结果组合,产生一个支持别名表的虚拟表.
test
cids ,1000004525,215937867,1015096900, | dt 2014-12-12 |
,1,9, | 2015-12-12 |
使用 lateral view explode(split(cids,',')) t as cid 将原表的cids拆分 生成新的虚拟表,字段为cid.
SELECT cid,dt FROM test LATERAL VIEW explode(cids) test_new AS cid;
1000004525 | 2014-12-12 |
215937867 | 2014-12-12 |
1015096900 | 2014-12-12 |
1 | 2015-12-12 |
9 | 2015-12-12 |
一个FROM语句后可以跟多个lateral view语句,后面的lateral view语句能够引用它前面的所有表和列名。
select wire.thedate ,time_stamp1 ,access_url1 from (select thedate ,time_stamp ,access_url from external_weblog_wireless where dt='2016-08-10' )wire lateral view explode(split(time_stamp,';')) t as time_stamp1 lateral view explode(split(access_url,';')) t1 as access_url1 limit 100;
5. Hive 函数没有的功能
取某一天是星期几?
pmod(datediff(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'), '1920-01-01') - 3, 7)
对日期按月份进行加减?
date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-01-01'),31), -- 1月的上个月月初 date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-01-01'),1), -- 1月的上个月月尾 date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-01-01'),61), -- 1月的两个月前的月初 date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-01-01'),32), -- 1月的两个月前的月尾 date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-01-01'),31), -- 2月的两个月前的月初 date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-01-01'),1), -- 2月的两个月前的月尾
当前月的月初时间获得:
date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd')-1)
上月的月初:
date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(FROM_UNIXTIME(UNIX_TIMESTAMP()))))+day(FROM_UNIXTIME(UNIX_TIMESTAMP()))-1)
上月的月末:
date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(FROM_UNIXTIME(UNIX_TIMESTAMP())))
两个月前的月初:
date_sub(date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(FROM_UNIXTIME(UNIX_TIMESTAMP()))))+day(FROM_UNIXTIME(UNIX_TIMESTAMP()))),day(date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(FROM_UNIXTIME(UNIX_TIMESTAMP()))))+day(FROM_UNIXTIME(UNIX_TIMESTAMP()))))-1)
两个月前的月尾:
date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(date_sub(FROM_UNIXTIME(UNIX_TIMESTAMP(),'yyyy-MM-dd'),day(FROM_UNIXTIME(UNIX_TIMESTAMP()))))+day(FROM_UNIXTIME(UNIX_TIMESTAMP())))