赞
踩
Hive 是基于 Hadoop 的数据仓库工具,可以提供类 SQL 查询能力
## 1.创建安装目录 mkdir -p /usr/local/hive ## 2.将压缩包拷贝到服务器并解压 tar zxvf apache-hive-4.0.0-alpha-2-bin.tar.gz -C /usr/local/hive/ ## 3.添加环境变量并刷新 echo 'export HIVE_HOME=/usr/local/hive/apache-hive-4.0.0-alpha-2-bin' >> /etc/profile echo 'export PATH=${HIVE_HOME}/bin:${PATH}' >> /etc/profile source /etc/profile ## 4.进入安装目录 cd $HIVE_HOME/conf ## 5.复制 hive-env.sh.template 并修改配置 cp hive-env.sh.template hive-env.sh echo 'export JAVA_HOME=/usr/local/java/jdk-11.0.19' >> hive-env.sh echo 'export HADOOP_HOME=/usr/local/hadoop/hadoop-3.3.6' >> hive-env.sh echo 'export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop' >> hive-env.sh echo 'export HIVE_HOME=/usr/local/hive/apache-hive-4.0.0-alpha-2-bin' >> hive-env.sh echo 'export HIVE_CONF_DIR=${HIVE_HOME}/conf' >> hive-env.sh echo 'export HIVE_AUX_JARS_PATH=${HIVE_HOME}/lib' >> hive-env.sh ## 6.复制 mysql-connector-j-8.0.33.jar 到 ${HIVE_HOME}/lib tar zxvf mysql-connector-j-8.0.33.tar.gz cp mysql-connector-j-8.0.33/mysql-connector-j-8.0.33.jar ${HIVE_HOME}/lib/ ## 7.复制 cp hive-default.xml.template 并修改配置 cp hive-default.xml.template hive-site.xml
使用 MobaXterm 连接虚拟机,并用文本编辑工具打开 hive-site.xml 修改:
注释掉同名的默认配置,或参考下面信息,直接修改默认配置值
同时全局替换下面两个变量值,避免 hiveserver2 启动报错
1.${system:java.io.tmpdir} => /tmp (Linux 系统默认的临时目录)
2.${system:user.name} => root (本系统当前操作用户名)
3.hive.server2.thrift.client.user 配置用户 root , 同时需要修改 hadoop 的 core-site.xml 为 root 配置代理信息
4.hadoop 集群搭建参考前面文章
<!--配置 Hive Metastore 此处使用 mysql & 转义 &--> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://192.168.1.201:3306/hive?characterEncoding=UTF8&createDatabaseIfNotExist=true&serverTimezone=GMT%2B8&useSSL=false&allowPublicKeyRetrieval=true</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.cj.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>admin</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>12345678</value> </property> <property> <name>datanucleus.schema.autoCreateAll</name> <value>true</value> </property> <!-- nn 配置主机地址/用户/密码 --> <property> <name>hive.server2.thrift.bind.host</name> <value>nn</value> </property> <property> <name>hive.server2.thrift.client.user</name> <value>root</value> <description>Username to use against thrift client. default is 'anonymous'</description> </property> <property> <name>hive.server2.thrift.client.password</name> <value>123456</value> <description>Password to use against thrift client. default is 'anonymous'</description> </property> <property> <name>hive.metastore.event.db.notification.api.auth</name> <value>false</value> </property> <property> <name>hive.server2.active.passive.ha.enable</name> <value>true</value> </property> <property> <name>hive.metastore.warehouse.dir</name> <value>hdfs://nn:9000/user/hive/warehouse</value> <description>hdfs 地址</description> </property> <property> <name>hive.metastore.schema.verification</name> <value>true</value> </property> <property> <name>hive.metastore.uris</name> <value>thrift://nn:9083</value> </property>
初始化元数据
查看库 Hive
## 1.进入配置目录
cd $HADOOP_HOME//etc/hadoop
## 2.修改 core-site.xml
vim core-site.xml
## 3.增加以下内容
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
## 1.初始化
schematool -initSchema -dbType mysql
## 2.启动 hadoop 集群
cd $HADOOP_HOME/sbin && start-all.sh
## 3.启动 hive
mkdir -p /var/log/hive
cd ${HIVE_HOME}/bin
nohup hive --service metastore 2>&1 >> /var/log/hive/metastore.log &
nohup hive --service hiveserver2 2>&1 >> /var/log/hive/hiveserver2.log &
## 4.杀死 hive 进程
kill -9 `ps aux | grep hiveserver2 | grep -v grep | awk '{print $2}'`
kill -9 `ps aux | grep metastore | grep -v grep | awk '{print $2}'`
查看 Java 进程:jps
查看端口占用:lsof -i -P -n | grep LISTEN
查看默认日志:tail -200f /tmp/root/hive.log
日志发现一个报错:java.sql.SQLException: Referencing column 'ACTIVE_EXECUTION_ID' and referenced column 'SCHEDULED_EXECUTION_ID' in foreign key constraint 'SCHEDULED_EXECUTIONS_SCHQ_ACTIVE' are incompatible.
因为 MySQL 版本为 8.0.33,可能导致了主外键字段类型不一致时的异常
对应初始化后的表和字段分别为:
SCHEDULED_EXECUTIONS - SCHEDULED_EXECUTION_ID
SCHEDULED_QUERIES - ACTIVE_EXECUTION_ID
原来这两个字段一个为 int ,一个为 bigint 统一为 bigint 后就不报错了
## 1.控制台连接
hive
## 2.连接并输入用户名、密码
!connect jdbc:hive2://nn:10000
## 3.查看库
show databases;
## 4.退出
!quit
UI 访问:http://192.168.1.6:10002/
hdfs 查看: http://192.168.1.6:9870/explorer.html#/tmp/hive/root
## 1.建库并设置存储位置 create database if not exists animal_db comment "This is animal database" location '/hive_database/animal_db'; ## 2.查看库 show databases; ## 3.切换库 use animal_db; ## 4.创建表 create table if not exists dog_tb(name string,breed string,area string,feature string) row format delimited fields terminated by ';'; ## 5.从本地文件加载数据 load data local inpath '/home/dog_tb.txt' into table dog_tb; ## 6.查看数据 select * from dog_tb; ## 7.删除表 drop table dog_tb; ## 7.强制删库 drop database animal_db cascade ;
dog_tb.txt
bomei;small;germany;white,small
bianmu;big;scotland;clever
tugou;all;china;clever,loyalty
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>org.example</groupId> <artifactId>hive-demo</artifactId> <version>1.0-SNAPSHOT</version> <properties> <maven.compiler.source>11</maven.compiler.source> <maven.compiler.target>11</maven.compiler.target> <spring.version>2.7.8</spring.version> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>org.springframework.boot</groupId> <artifactId>spring-boot-starter-web</artifactId> <version>${spring.version}</version> </dependency> <dependency> <groupId>org.projectlombok</groupId> <artifactId>lombok</artifactId> <version>1.18.28</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>2.0.32</version> </dependency> <dependency> <groupId>org.apache.hive</groupId> <artifactId>hive-jdbc</artifactId> <version>4.0.0-alpha-2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>3.3.6</version> </dependency> </dependencies> </project>
config:
hivedriverClassName: org.apache.hive.jdbc.HiveDriver
hiveurl: jdbc:hive2://192.168.1.6:10000/animal_db
hiveusername: root
hivepassword: 123456
package org.example; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; /** * @author Administrator */ @SpringBootApplication public class HiveApp { public static void main(String[] args) { //启动触发 SpringApplication.run(HiveApp.class,args); } }
package org.example.config; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.context.annotation.Bean; import org.springframework.stereotype.Component; import java.sql.Connection; import java.sql.DriverManager; import java.util.Properties; /** * @author Administrator * @Description * @create 2023-08-02 21:42 */ @Component public class HiveConfig { @Bean("hiveProperties") @ConfigurationProperties(prefix = "config") public Properties getConfig(){ return new Properties(); } @Bean public Connection start(@Qualifier("hiveProperties") Properties properties){ try { String url = (String) properties.get("hiveurl"); String user = (String) properties.get("hiveusername"); String password = (String) properties.get("hivepassword"); Connection conn = DriverManager.getConnection(url,user,password); conn.setAutoCommit(true); return conn; } catch (Exception e) { System.out.println(e); } return null; } }
package org.example.controller; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang.StringUtils; import org.apache.hive.jdbc.HivePreparedStatement; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.web.bind.annotation.GetMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; import java.sql.Connection; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; /** * @author Administrator * @Description * @create 2023-08-02 21:42 */ @Slf4j @RestController @RequestMapping("/hive") public class HiveController { /** * 注入连接类 */ @Autowired Connection conn; /** * 列 */ List<String> columns = Arrays.asList("name","breed","area","feature"); /** * 插入 * @throws SQLException */ @GetMapping("/insert") public void insert() throws SQLException { String sql = "insert into dog_tb values (?,?,?,?)"; HivePreparedStatement pStSm= (HivePreparedStatement) conn.prepareStatement(sql); pStSm.setString(1, "keji"); pStSm.setString(2, "small"); pStSm.setString(3, "welsh"); pStSm.setString(4, "friendly"); pStSm.executeUpdate(); } /** * 查询 * @return * @throws SQLException */ @GetMapping("/query") public List<String> query(int index,String value) throws SQLException { List<String> list = new ArrayList<>(); String sql = "select * from dog_tb"; if (index > 0 && StringUtils.isNotEmpty(value)){ sql = "select * from dog_tb where name = ?"; } HivePreparedStatement pStSm= (HivePreparedStatement) conn.prepareStatement(sql); if (index >= 0 && StringUtils.isNotEmpty(value)){ pStSm.setString(index, value); } ResultSet resultSet = pStSm.executeQuery(); StringBuilder builder = new StringBuilder(); while (resultSet.next()){ builder.setLength(0); for (String col:columns){ builder.append(resultSet.getString(col)).append(";"); } String result = builder.substring(0,builder.length()-1); list.add(result); log.info("row: {}",result); } return list; } }
如果插入或查询报错可通过如下位置查询报错信息
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。