赞
踩
在大量数据需要查询时,虽然可以通过where进行筛选,但也是检索整个数据表后得到的结果。
而将一个大的数据集根据实际需要分割成各个小型数据集,再通过where选择需要查询的分区,
故而效率大大提高。
Hive中的分区是将一个文件分割成各个目录(文件)。
create table 分区表名称(
字段名称1 数据类型,
字段名称2 数据类型,
。。。
字段名称n 数据类型
)
partitioned by(分区字段名称1 数据类型)
row format delimited
fields terminated by '分割符';
collection items terminated by '分割符'
map keys terminated by '分割符'
lines terminated by '分割符'
- create table song1(
- id int,
- name string,
- num int
- )
- partitioned by (month string)
- row format delimited fields terminated by '\t';
create table 分区表名称(
字段名称1 数据类型,
字段名称2 数据类型,
。。。
字段名称n 数据类型
)
partitioned by(分区字段名称1 数据类型,分区字段名称2 数据类型)
row format delimited
fields terminated by '分割符';
- create table singpartition(
- id int, name string, num int
- )
- partitioned by (month string, day string)
- row format delimited fields terminated by '\t';
load data local inpath '本地路径文件' into table 分区表名 partition(分区字段名称1=字段值);
hive> load data local inpath '/opt/songs.txt' into table song1 partition(month=20220108);
hive> load data local inpath '/opt/songs2.txt' into table song1 partition(month=20220109);
hive> load data local inpath '/opt/songs3.txt' into table song1 partition(month=20220110);
load data local inpath '本地路径文件' into table 分区表名 partition(分区字段名称1=字段值,分区字段名称2=字段值);
- load data local inpath '/opt/songs.txt' into table
- singpartition partition(month='20220111',day=13);
select 需查询字段 from 表名 where 分区字段名称=字段值
- hive> select * from song1 where month=20220110;
- OK
- 1 never3 3 20220110
- 2 over3 2 20220110
- hive> select name from song1 where month=20220109;
- OK
- never2
- over2
①:通过union
select 需查询字段 from 表名 where 分区字段名称=字段值1
union
select 需查询字段 from 表名 where 分区字段名称=字段值2 ;
- hive> select * from song1 where month='20220109'
- > union
- > select * from song1 where month='20220110';
- WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
- Query ID = root_20220110232623_3d981c7c-6cee-460d-a564-038775507413
- Total jobs = 1
- Launching Job 1 out of 1
- Number of reduce tasks not specified. Estimated from input data size: 1
- In order to change the average load for a reducer (in bytes):
- set hive.exec.reducers.bytes.per.reducer=<number>
- In order to limit the maximum number of reducers:
- set hive.exec.reducers.max=<number>
- In order to set a constant number of reducers:
- set mapreduce.job.reduces=<number>
- Starting Job = job_1641870198202_0002, Tracking URL = http://master:8088/proxy/application_1641870198202_0002/
- Kill Command = /opt/software/hadoop-2.7.1/bin/hadoop job -kill job_1641870198202_0002
- Hadoop job information for Stage-1: number of mappers: 2; number of reducers: 1
- 2022-01-10 23:26:52,299 Stage-1 map = 0%, reduce = 0%
- 2022-01-10 23:27:15,111 Stage-1 map = 100%, reduce = 0%, Cumulative CPU 5.97 sec
- 2022-01-10 23:27:24,753 Stage-1 map = 100%, reduce = 100%, Cumulative CPU 8.3 sec
- MapReduce Total cumulative CPU time: 8 seconds 300 msec
- Ended Job = job_1641870198202_0002
- MapReduce Jobs Launched:
- Stage-Stage-1: Map: 2 Reduce: 1 Cumulative CPU: 8.3 sec HDFS Read: 16922 HDFS Write: 213 SUCCESS
- Total MapReduce CPU Time Spent: 8 seconds 300 msec
- OK
- 1 never2 3 20220109
- 1 never3 3 20220110
- 2 over2 2 20220109
- 2 over3 2 20220110
②通过 or
select 需查询字段 from 表名 where 分区字段名称=字段值1 or 分区字段名称=字段值2
- hive> select * from song1 where month=20220110 or month=20220108;
- OK
- 1 never 3 20220108
- 2 over 2 20220108
- 1 never3 3 20220110
- 2 over3 2 20220110
语法:select 查询字段 from 表名 where 分区字段1=分区字段1的值
and 分区字段2=分区字段2的值;
select * from singpartition where month='20220111' and day=13;
①增加单个分区
语法:alter table 表名 add partition (分区字段='分区字段值');
hive> alter table song1 add partition(month='20220112');
②同时创建多个分区 add partition (分区字段='分区字段值1') partition (分区字段='分区字段值2');
语法:alter table 表名
hive> alter table song1 add partition(month='20220111') partition(month='20220106');
①删除单个分区
语法:alter table 表名 drop partition(分区字段='分区字段值');
- hive> alter table song1 drop partition(month='20220112');
- Dropped the partition month=20220112
- OK
- Time taken: 0.777 seconds
①删除多个分区
语法:alter table 表名 drop partition(分区字段='分区字段值'),partition(分区字段='分区字段值2');
- alter table song1 drop partition(month='20220111'),partition(month='20220106');
- Dropped the partition month=20220106
- Dropped the partition month=20220111
- OK
- Time taken: 0.407 seconds
语法:show partitions 表名
- hive> show partitions song1;
- OK
- month=20220108
- month=20220109
- month=20220110
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。