7.hive数据仓库Data Warehouse_datawarehouse与hive的关系

作者：很楠不爱3 | 2024-05-07 22:49:13

踩

datawarehouse与hive的关系

7.hive数据仓库Data Warehouse

7.1hive启动方式

#Beeline Client
#在hive运行的服务器上，首先启动metastore服务，然后启动hiveserver2服务。
nohup /export/servers/hive-2.1.0/bin/hive --service metastore &
nohup /export/servers/hive-2.1.0/bin/hive --service hiveserver2 &
#nohup 和 & 表示后台启动
#在node3上使用beeline客户端进行连接访问
/export/servers/hive-2.1.0/bin/beeline

#例子
[root@node3 ~]# /export/servers/hive-2.1.0/bin/beeline
which: no hbase in (:/export/servers/hive-2.1.0/bin::/export/servers/hadoop-2.7.5/bin:/export/servers/hadoop-2.7.5/sbin::/export/servers/jdk1.8.0_241/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/export/servers/mysql-5.7.29/bin:/root/bin)
Beeline version 2.1.0 by Apache Hive
beeline> !connect jdbc:hive2://node3:10000
Connecting to jdbc:hive2://node3:10000
Enter username for jdbc:hive2://node3:10000: root
Enter password for jdbc:hive2://node3:10000:123456
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

7.2hive数据库和表的操作

1#建数据库
create database if not exists myhive;
use  myhive;
2#创建数据库指定hdfs存储位置
create database myhive2 location '/myhive2';

3#查看数据库详细信息
desc  database  myhive;

4#删除一个空数据库，如果数据库下面有数据表，那么就会报错
drop  database  myhive;
5#强制删除数据库，包含数据库下面的表一起删除
drop  database  myhive2  cascade; 

6#数据库建立表的语法
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name 
   [(col_name data_type [COMMENT col_comment], ...)] 
   [COMMENT table_comment] 
   [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] 
   [CLUSTERED BY (col_name, col_name, ...) 
   [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] 
   [ROW FORMAT row_format] 
   [STORED AS file_format] 
   [LOCATION hdfs_path]
7#例子
create database myhive;
use myhive;
create table stu(id int,name string);
insert into stu values (1,"zhangsan");
select * from stu;
8#字段分隔符为空格  文本类型  路劲）
create table if not exists stu2(
    id int ,
    name string) 
row format delimited fields terminated 
by '\t' stored 
location 'user/stu2'
9#查询表的类型
desc formatted  stu2;
10#删除表
drop table stu2;
11#根据查询结果来创建表
create table stu3 as select * from stu2
12#根据存在的表创建表
create table stu4 like stu2

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46

说明：

CREATE TABLE 创建一个指定名字的表。如果相同名字的表已经存在，则抛出异常；用户可以用 IF NOT EXISTS 选项来忽略这个异常。

2、EXTERNAL关键字可以让用户创建一个外部表，在建表的同时指定一个指向实际数据的路径（LOCATION），Hive 创建内部表时，会将数据移动到数据仓库指向的路径；若创建外部表，仅记录数据所在的路径，不对数据的位置做任何改变。在删除表的时候，内部表的元数据和数据会被一起删除，而外部表只删除元数据，不删除数据。

7.2.1外部表

在创建表的时候可以指定****external*关键字创建外部表,外部表对应的文件存储在*location*指定的*hdfs****目录下,向该目录添加新文件的同时，该表也会读取到该文件(当然文件格式必须跟表定义的一致)。

外部表因为是指定其他的hdfs路径的数据加载到表当中来，所以hive表会认为自己不完全独占这份数据，所以****删除hive********外部********表的时候，数据仍然存放在hdfs当中，不会删掉********。****

#数据装在到load
load data [local] inpath '/export/servers/datas/student.txt' overwrite | into table student [partition (partcol1=val1,…)];
	1、load data:表示加载数据
	2、local:表示从本地加载数据到hive表；否则从HDFS加载数据到hive表
	3、inpath:表示加载数据的路径
	4、overwrite:表示覆盖表中已有数据，否则表示追加
	5、into table:表示加载到哪张表（追加）
	6、student:表示具体的表
	7、partition:表示上传到指定分区
	
#举例
create external table teacher(
	t_id string,
    t_name string
)row format deliminted fields terminated by '\t'

create external table student(
	s_id string,
	s_name string,
	s_birth string,
	s_sex string)
row format delimited fields terminated by ']t'
#从本地文件系统向表中加载数据
load data local inpath '/export/servers/hivedatas/student.txt' into table student;
#加载数据并覆盖已有数据
load data local inpath '/export/servers/hivedatas/student.txt' overwrite  into table student;

#从hdfs文件系统向表中加载数据
cd /export/servers/hivedatas
hadoop fs -mkdir -p /hivedatas
hadoop fs -put teacher.txt /hivedatas/
load data inpath '/hivedatas/teacher.txt' into table teacher;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

7.3复杂类型的操作

7.3.1 array类型

#源数据: 
说明:name与locations之间制表符分隔，locations中元素之间逗号分隔
zhangsan	  beijing,shanghai,tianjin,hangzhou
wangwu   	changchun,chengdu,wuhan,beijin

create table hive_array(
	name string,
    work_location array<string>
)row format delimited fields terminated by '\t'
collection items terminated by ','

#导入数据
load data local inpath '/export/servers/hivedatas/work_locations.txt' overwrite into table hive_array;

#查询数据
-- 查询所有数据
select * from hive_array;
-- 查询loction数组中第一个元素
select name, work_locations[0] location from hive_array;
-- 查询location数组中元素的个数
select name, size(work_locations) location from hive_array;
-- 查询location数组中包含tianjin的信息
select * from hive_array where array_contains(work_locations,'tianjin'); 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25

7.3.2 map类型

说明：字段与字段分隔符: “,”；需要map字段之间的分隔符："#"；map内部k-v分隔符：":"

#元数据
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28

2,lisi,father:mayun#mother:huangyi#brother:guanyu,22

3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29

4,mayun,father:mayongzhen#mother:angelababy,26

create table hive_map(
	id int,
    name string,
    members map<string,string>,
    age int
)
row format delimited
fields terminated by '\t'
collection items terminated by '#'
map keys terminated ':'

#数据导入
load data local inpath '/export/servers/hivedatas/hive_map.txt' overwrite into table hive_map;

select * from hive_map;
select id, name, members['father'] father, members['mother'] mother, age from hive_map;
select id, name, map_keys(members) as relation from hive_map;
select id, name, map_values(members) as relation from hive_map;
select id,name,size(members) num from hive_map;
select * from hive_map where array_contains(map_keys(members), 'brother');
select id,name, members['brother'] brother from hive_map where array_contains(map_keys(members), 'brother');


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

7.3.3 struct类型

说明：字段之间#分割，第二个字段之间冒号分割

192.168.1.1#zhangsan:40
192.168.1.2#lisi:50
192.168.1.3#wangwu:60
192.168.1.4#zhaoliu:70

create table hive_struct(
	ip string,
    info struct<name:string,age:int>
)
row format delimited
fields terminated by '#'
collection items terminated by ':'


load data local inpath '/export/servers/hivedatas/hive_struct.txt' into table hive_struct;

select * from hive_struct;
select ip, info.name from hive_struct;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18

7.4内部表和外部表的装换

1、查询表的类型
desc formatted student;
Table Type:             MANAGED_TABLE

2、修改内部表student为外部表
alter table student set tblproperties('EXTERNAL'='TRUE');

3、查询表的类型
desc formatted student;
Table Type:             EXTERNAL_TABLE

4、修改外部表student为内部表
alter table student set tblproperties('EXTERNAL'='FALSE');

5、查询表的类型
desc formatted student;
Table Type:             MANAGED_TABLE

注意：('EXTERNAL'='TRUE')和('EXTERNAL'='FALSE')为固定写法，区分大小写！
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19

声明：本文内容由网友自发贡献，不代表【wpsshop博客】立场，版权归原作者所有，本站不承担相应法律责任。如您发现有侵权的内容，请联系我们。转载请注明出处：https://www.wpsshop.cn/w/很楠不爱3/article/detail/551679