当前位置:   article > 正文

八十二、尚硅谷kylin单机版环境——安装Hadoop3.1.3_hadoop-common-3.1.3.jar

hadoop-common-3.1.3.jar

安装Hadoop

(一)打开安装包所在地
[root@hurys22 etc]# cd /opt/install/
[root@hurys22 install]# ls
hadoop-3.1.3.tar.gz

(二)解压安装包
[root@hurys22 install]# tar -zxf /opt/install/hadoop-3.1.3.tar.gz  -C /opt/soft/
[root@hurys22 install]# cd /opt/soft
[root@hurys22 soft]# ls
hadoop-3.1.3  

(三)重命名
[root@hurys22 soft]# mv hadoop-3.1.3  hadoop313
[root@hurys22 soft]# ls
hadoop313  

(四)查看路径
[root@hurys22 ~]# cd /opt/soft/hadoop313/
[root@hurys22 hadoop313]# pwd
/opt/soft/hadoop313

(五)配置环境
[root@hurys22 soft]# vi /etc/profile
#hadoop
export HADOOP_HOME=/opt/soft/hadoop313
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin

export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root

(六)环境配置好后,source一下
[root@hurys22 soft]# source /etc/profile

(七)查看Hadoop安装版本
[root@hurys22 hadoop]# hadoop version 
Hadoop 3.1.3
Source code repository https://gitbox.apache.org/repos/asf/hadoop.git -r ba631c436b806728f8ec2f54ab1e289526c90579
Compiled by ztang on 2019-09-12T02:47Z
Compiled with protoc 2.5.0
From source with checksum ec785077c385118ac91aadde5ec9799
This command was run using /opt/soft/hadoop313/share/hadoop/common/hadoop-common-3.1.3.jar

(八)查看Java安装路径
[root@hurys22 hadoop]# echo $JAVA_HOME
/usr/local/java

(九)查看java版本
[root@hurys22 hadoop]# java -version
java version "1.8.0_311"
Java(TM) SE Runtime Environment (build 1.8.0_311-b11)
Java HotSpot(TM) 64-Bit Server VM (build 25.311-b11, mixed mode)

(十)查看虚拟机名称
[root@hurys22 hadoop]# hostname
hurys22

(十一)查看虚拟机地址
[root@hurys22 hadoop]# ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
    link/ether 00:50:56:26:48:4f brd ff:ff:ff:ff:ff:ff
    inet 192.168.0.22/22 brd 192.168.59.255 scope global noprefixroute ens33
    
(十二)修改windows系统 c:/window/system32/drivers/etc/hosts    添加ip hostname    
192.168.0.22  hurys22
    
(十三)配置Hadoop文件
[root@hurys22 soft]# cd /opt/soft/hadoop313/etc/hadoop/

1.配置文件hadoop-env.sh
[root@hurys22 hadoop]# vi hadoop-env.sh
54 export JAVA_HOME=/usr/local/java

103 # export HADOOP_CLIENT_OPTS=""
104 export HADOOP_CLIENT_OPTS="-Xmx1022m $HADOOP_CLIENT_OPTS"


2.配置文件core-site.xml
先新建文件夹 hadooptmp  路径/opt/soft/hadoop313/hadooptmp
[root@hurys22 hadoop]# mkdir -p /opt/soft/hadoop313/hadooptmp

[root@hurys22 hadoop]# vi  core-site.xml 
<configuration>
<property>
    <!-- HDFS namenode地址 -->
    <name>fs.defaultFS</name>
    <value>hdfs://hurys22:8020</value>
  </property>
<property>
    <!-- HADOOP 运行时存储路径 -->
    <name>hadoop.tmp.dir</name>
    <value>/opt/soft/hadoop313/hadooptmp</value>
  </property>
  <!-- 配置HDFS网页登录使用的静态用户为 root -->
    <property>
        <name>hadoop.http.staticuser.user</name>
        <value>root</value>
    </property>
  <property>
   <!-- 配置该root(superUser)允许通过代理访问的主机节点 -->
    <name>hadoop.proxyuser.root.hosts</name>
    <value>*</value>
  </property>
  <!-- 配置该root(superUser)允许通过代理用户所属组 -->
  <property>
    <name>hadoop.proxyuser.root.groups</name>
    <value>*</value>
  </property>
  <!-- 配置该root(superUser)允许通过代理的用户 -->
    <property>
        <name>hadoop.proxyuser.root.groups</name>
        <value>*</value>
    </property>
   <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
   </property>
</configuration>


3.配置文件  hdfs-site.xml
[root@hurys22 hadoop]# vi hdfs-site.xml
<configuration>
<!-- 设置hadoop存储文件的复本数,默认3份 -->
  <property>
    <name>dfs.replication</name>
    <value>3</value>
  </property>
  
<!-- namenode web端访问地址 -->
  <property>
    <name>dfs.namenode.http-address</name>
    <value>hurys22:9870</value>
  </property>
  
<!-- 2namenode web端访问地址 -->
    <property>
        <name>dfs.namenode.secondary.http-address</name>
        <value>hurys22:9868</value>
    </property>
</configuration>

注意dfs.namenode.http-address在hadoop-3.1.0版本上的默认值是 0.0.0.0:9870 ,在hadoop-2.7.7版本上的默认值是0.0.0.0:50070,所以不同版本可以通过不同端口访问NameNode。

4.配置文件  mapred-site.xml
[root@hurys22 hadoop]# vi ./mapred-site.xml
<configuration>

<property>
    <!-- mapreduce执行计算时,使用的资源调度框架为Yarn -->
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
  
   <!-- 历史服务器端地址 -->
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>hurys22:10020</value>
  </property>
  
   <!-- 历史服务器web端地址 -->
  <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>hurys22:19888</value>
  </property>

</configuration>

5.配置文件  yarn-site.xml
[root@hurys22 hadoop]# vi ./yarn-site.xml
<configuration>
 <!-- 指定MR走shuffle -->
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    
    <!-- 指定ResourceManager的地址 -->
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>hurys22</value>
    </property>
    
    <!-- 环境变量的继承 -->
    <property>
        <name>yarn.nodemanager.env-whitelist</name>      <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
    </property>
    
    <!-- yarn容器允许分配的最大最小内存 -->
    <property>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>512</value>
    </property>
    <property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>4096</value>
    </property>
    
    <!-- yarn容器允许管理的物理内存的大小 -->
    <property>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>4096</value>
    </property>
    
    <!-- 关闭yarn对物理内存和虚拟内存的限制检查 -->
    <property>
        <name>yarn.nodemanager.pmem-check-enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>yarn.nodemanager.vmem-check-enabled</name>
        <value>false</value>
    </property>

    <!-- 开启日志聚集功能 -->
    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>
    
    <!-- 设置日志聚集服务器地址 -->
    <property>
        <name>yarn.log.server.url</name>
        <value>http://hurys22:19888/jobhistory/logs</value>
    </property>
    
    <!-- 设置日志保留时间为7天 -->
    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>604800</value>
    </property>
</configuration>


6.配置文件   slaves
[root@hurys22 hadoop]# vi ./slaves
hurys22

(十四)namenode初始化
[root@hurys22 hadoop]# hadoop namenode -format 

2023-04-08 14:52:38,426 INFO common.Storage: Storage directory /opt/soft/hadoop313/hadooptmp/dfs/name has been successfully formatted.


(十五)挨个启动Hadoop组件
[root@hurys22 hadoop]# hadoop-daemon.sh start namenode
[root@hurys22 hadoop]# hadoop-daemon.sh stop namenode

[root@hurys22 hadoop]# hadoop-daemon.sh start datanode
[root@hurys22 hadoop]# hadoop-daemon.sh stop datanode

[root@hurys22 hadoop]# hadoop-daemon.sh start secondarynamenode
[root@hurys22 hadoop]# hadoop-daemon.sh stop secondarynamenode

启动yarn资源管理器  NodeManager ResourceManager
[root@hurys22 hadoop]# start-yarn.sh
[root@hurys22 hadoop]# stop-yarn.sh

启动hdfs DataNode namenode secondarynamenode
[root@hurys22 hadoop]# start-dfs.sh 
[root@hurys22 hadoop]# stop-dfs.sh 

[root@hurys22 hadoop]# yarn-daemon.sh start nodemanager
[root@hurys22 hadoop]# yarn-daemon.sh stop nodemanager
[root@hurys22 hadoop]# yarn-daemon.sh start resourcemanager
[root@hurys22 hadoop]# yarn-daemon.sh stop resourcemanager

全部启动
[root@hurys22 hadoop]# start-all.sh 
[root@hurys22 hadoop]# stop-all.sh 


http://hostname/ip:9870    HDFS(3.X版本默认接口为9870)

http://hurys22:8088/      yarn管理界面

http://hurys22:19888/     jobhistory界面

启动历史服务
[root@hurys22 hadoop]# mr-jobhistory-daemon.sh start historyserver

[root@hurys22 hadoop]# jps
6064 NodeManager
6289 JobHistoryServer
5346 NameNode
5783 ResourceManager
6359 Jps
5627 SecondaryNameNode
5469 DataNode

命令方式查看 节点状态
[root@hurys22 hadoop]# yarn node -list -all

在hdfs文件系统中创建input目录
[root@hurys22 hadoop]# 单个目录  hdfs dfs -mkdir /input      多级目录  hdfs dfs -mkdir -p /input

查看目录下的文件信息
[root@hurys22 hadoop]# hdfs dfs -ls /        hdfs dfs -ls /input

上传文件到hdfs指定目录下
[root@hurys22 hadoop]# hdfs dfs -put ./yarn-env.sh /input/

下载
[root@hurys22 hadoop260]# hdfs dfs -get /input/yarn-env.sh ./yarn-env.sh.bak

删除
[root@hurys22 hadoop260]# hdfs dfs -rmr /input

远程连接
[root@hurys22 hadoop]# ssh-keygen -t rsa  -P ""
[root@hurys22 hadoop]# cd
[root@hurys22 ~]# cd .ssh/
[root@hurys22 .ssh]# ls
authorized_keys  id_rsa  id_rsa.pub  known_hosts
[root@hurys22 .ssh]# cat id_rsa.pub 
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCf3QxyVdpeunW9AL8+4CpYQLv66E/xcDMO+GpRoObcoJ0QJ99pR2tynbd4joEqaOKRf3j79g5HAhcmPyWhuiif7CFLrqpRJKLeqpERb9etna2njwbcR8pYwuSVIu2Tym0CHMtnddMUvOL+GAZWrwpCXze9QxNnflEOBF63ObnhCD8DnN6+SoNFyYqtChUdmub+SKy3gv2BOX9aFC9pvWvPgjVVFmAVgS2BrTAZX0yJbPCNz9TCvD4C7YZlbBoPb7WFz6Bd+Jp1ilsotHK/I6uzp99y5DFKVBAhvCzQVdJNfbUjlommjzg/+FHyOv8DoOlP6iwcxheXK3ZJfuRmbOzL root@hurys22

[root@hurys22 .ssh]# cd
[root@hurys22 ~]# ssh-copy-id -i .ssh/id_rsa.pub  -p22 root@192.168.0.22     密码为root(即之前设置的虚拟机密码)
[root@hurys22 ~]# ssh -p22 root@192.168.0.22
Last login: Tue Apr 26 15:31:21 2022
[root@hurys22 ~]# exit
logout
Connection to 192.168.0.22 closed.
[root@hurys22 ~]# ssh -p22 root@hurys22
Last login: Tue Apr 26 16:46:50 2022 from hurys22

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/不正经/article/detail/455349
推荐阅读
相关标签
  

闽ICP备14008679号