赞
踩
--创建表,确定schema和各种format DROP TABLE IF EXISTS shufang.students; CREATE TABLE IF NOT EXISTS shufang.students( id int , name string, create_time string ) partitioned by (dt string) --指定分区表 row format delimited fields terminated by '\t' --指定字段分隔符 STORED AS INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat' --指定INPUTFORMAT,就是从 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION '/user/hive/warehouse/shufang.db/students' --指定在表在HDFS中的存储路径 ; --导入数据,是不走MR的,只需要将指定的目录中的文件移动到分区目录下 LOAD DATA INPATH '/origin_data/db/shufang/students/2021-01-18' INTO TABLE shufang.students PARTITION(dt = '2021-01-18'); --如果是flume过来的日志数据,由于只做了压缩,还不支持切片,所以我们需要load之后将数据建立索引支持切片 hadoop jar /opt/module/hadoop-2.7.7/share/hadoop/common/hadoop-lzo-0.4.21-SNAPSHOT.jar \ com.hadoop.compression.lzo.DistributedLzoIndexer\ /user/hive/warehouse/shufang.db/students/dt=2021-01-18
CREATE TABLE IF NOT EXISTS student1( id int , name string, create_time string ) COMMENT 'parquet store table,parquet is born to support split' PARTITIONED BY(dt string) --指定分区键 STORED AS parquet --指定存储,底层还是inputformat 和 outputformat LOCATION '/user/hive/warehouse/shufang.db/student1' --指定存储路径 TBLPROPERTIES('parquet.compression' = 'lzo'); --指定表属性,为parquet指定压缩格式 INSERT OVERWRITE TABLE student1 PARTITION(dt = '2021-01-18') SELECT id, name, create_time FROM students WHERE dt='2021-01-18';
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。