赞
踩
INSERT INTO TABLE tablename1
[PARTITION (partcol1=val1, partcol2=val2 ...)]
select_statement1 FROM from_statement;
INSERT OVERWRITE TABLE tablename1
[PARTITION (partcol1=val1, partcol2=val2 ...)
[IF NOT EXISTS]]
select_statement1 FROM from_statement;
INSERT INTO
用于附加数据到配置单元表和分区表和,INSERT OVERWRITE
用来从表中删除现有的数据并插入新的数据。
出入新增值
INSERT INTO employee VALUES
(13,'Mari',51,'M'),
(14,'Pat',34,'F');
从查询语句中插入
INSERT INTO employee_tmp SELECT * FROM employee;
INSERT INTO zipcodes PARTITION(state='FL') VALUES
(891,'US','TAMPA',33605);
在插入前,需要开启hive的动态插入功能,
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
在这里,必须将partition列保留为最后一列。
动态分区插入主要是由于如果分区太多,手动写入分区很麻烦,因此可以基于查询参数推测除需要创建的分区。
insert overwrite table sport2 partition(year) select split(n1,'\t')[0],split(n1,'\t')[2],split(n1,'\t')[3],split(n1,'\t')[4],split(n1,'\t')[5],split(n1,'\t')[6],split(n1,'\t')[7],split(n1,'\t')[1] from sport;
在上面的语句中,partition(year)
和在查询中和分区匹配的split(n1,'\t')[1]
名称是不一样的,这也说明了在查询插入过程中,是基于位置插入,而不是名称插入。
如果同时使用静态和动态,这个必须静态在前,动态在后。
INSERT OVERWRITE TABLE order_partition
partition (year='2019',month)
SELECT order_id, order_date, order_status,
substr(order_date,1,4) ye, substr(order_date,5,2) mon
FROM orders
WHERE substr(order_date,1,4) ="2019";
create table 2012_sport as select n1,n2,n3 from sport2 where year=2012;
describe formatted 2012_sport; OK 2021-05-18 21:05:02,303 INFO [9b64abd8-c822-4803-ab03-dd4a27670202 main] mapred.FileInputFormat: Total input files to process : 1 # col_name data_type comment n1 string n2 string n3 string # Detailed Table Information Database: default Owner: root CreateTime: Tue May 18 21:03:22 CST 2021 LastAccessTime: UNKNOWN Retention: 0 Location: hdfs://hacluster/user/hive/warehouse/2012_sport Table Type: MANAGED_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} numFiles 1 numRows 74 rawDataSize 1768 totalSize 1842 transient_lastDdlTime 1621343003 # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 Time taken: 0.385 seconds, Fetched: 32 row(s)
这个功能适合从一个宽表中导出一个小表
insert overwrite local directory '/tmp/ca_eemm'
select name from sport2
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。