赞
踩
tar -zxvf jdk-8u144-linux-x64.tar.gz -C /opt/module/
pwd # 查看jdk的路径
sudo vi /etc/profile
#JAVA_HOME
export JAVA_HOME=/opt/module/jdk1.8.0_144
export PATH=$PATH:$JAVA_HOME/bin
:wq
source /etc/profile
java -version
注意:重启(如果java -version可以用就不用重启)
sync
sudo reboot
cd /opt/software/
tar -zxvf hadoop-2.7.2.tar.gz -C /opt/module/
ls /opt/module/
pwd
(2)打开/etc/profile文件
sudo vi /etc/profile
在profile文件末尾添加JDK路径:(shitf+g)
##HADOOP_HOME
export HADOOP_HOME=/opt/module/hadoop-2.7.2
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
(3)保存后退出
:wq
(4)让修改后的文件生效
source /etc/profile
hadoop version
sync
sudo reboot
cd
/home/atguigu
mkdir bin
cd bin/
touch xsync
vi xsunc
在该文件中编写如下代码
#!/bin/bash #1 获取输入参数个数,如果没有参数,直接退出 pcount=$# if((pcount==0)); then echo no args; exit; fi #2 获取文件名称 p1=$1 fname=`basename $p1` echo fname=$fname #3 获取上级目录到绝对路径 pdir=`cd -P $(dirname $p1); pwd` echo pdir=$pdir #4 获取当前用户名称 user=`whoami` #5 循环 for((host=103; host<105; host++)); do echo ------------------- hadoop$host -------------- rsync -rvl $pdir/$fname $user@hadoop$host:$pdir done
chmod 777 xsync
xsync /home/atguigu/bin
注意:如果将xsync放到/home/atguigu/bin目录下仍然不能实现全局使用,可以将xsync移动到/usr/local/bin目录下。
sudo yum install rsync
(1)核心配置文件
cd /opt/module/hadoop-2.7.2/etc/hadoop
vi core-site.xml
在config…中添加
<!-- 指定HDFS中NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop102:9000</value>
</property>
<!-- 指定Hadoop运行时产生文件的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-2.7.2/data/tmp</value>
</property>
(2)HDFS配置文件
配置hadoop-env.sh
vi hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_144
配置hdfs-site.xml
vi hdfs-site.xml
副本数量设置为1,是为了减少损耗,正常应该是3
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!-- 指定Hadoop辅助名称节点主机配置 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop104:50090</value>
</property>
(3)YARN配置文件
配置yarn-env.sh
vi yarn-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_144
配置yarn-site.xml
vi yarn-site.xml
在该文件中增加如下配置
<!-- Reducer获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop103</value>
</property>
(4)MapReduce配置文件
配置mapred-env.sh
vi mapred-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_144
配置mapred-site.xml
cp mapred-site.xml.template mapred-site.xml
vi mapred-site.xml
在该文件中增加如下配置
<!-- 指定MR运行在Yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
(5)slaves文件配置
vi slaves
文件中只写
hadoop101
hadoop101
hadoop101
hadoop102和hadoop103上必须配置免密登录
(1)进入根目录
cd .ssh/
ssh-keygen -t rsa
然后敲(三个回车),就会生成两个文件id_rsa(私钥)、id_rsa.pub(公钥)
(2)将公钥拷贝到要免密登录的目标机器上
ssh-copy-id hadoop102
ssh-copy-id hadoop103
ssh-copy-id hadoop104
(3)下载ssh客户端,因为ssh-copy-id命令不可用
sudo yum -y install openssh-clients
sudo yum -y install wget
wget http://mirrors.aliyun.com/centos/6.10/os/x86_64/
各种源地址
http://ftp.sjtu.edu.cn/centos/6.10/os/x86_64/
http://mirrors.cqu.edu.cn/CentOS/6.10/os/x86_64/
http://mirrors.163.com/centos/6.10/os/x86_64/
http://mirrors.cn99.com/centos/6.10/os/x86_64/
http://mirror.lzu.edu.cn/centos/6.10/os/x86_64/
http://mirrors.ustc.edu.cn/centos/6.10/os/x86_64/
http://mirrors.aliyun.com/centos/6.10/os/x86_64/
http://mirrors.huaweicloud.com/centos/6.10/os/x86_64/
http://mirrors.bfsu.edu.cn/centos/6.10/os/x86_64/
http://mirrors.neusoft.edu.cn/centos/6.10/os/x86_64/
xsync hadoop-2.7.2/
xsync jdk1.8.0_144/
sudo scp /etc/profile root@hadoop103:/etc/profile
#拷贝当前虚拟机下的/etc/profile文件到hadoop103虚拟机root用户的/etc/profile目录下
source /etc/profile
java -version
hadoop version
第一次启动时要格式化
bin/hdfs namenode -format
sbin/start-dfs.sh
sbin/start-yarn.sh
tar -zxvf zookeeper-3.4.10.tar.gz -C /opt/module/
(1)在/opt/module/zookeeper-3.4.10/这个目录下创建zkData
mkdir zkData
(2)在/opt/module/zookeeper-3.4.10/zkData目录下创建一个myid的文件
touch myid
(3)编辑myid文件
vi myid
在文件中添加与server对应的编号:2
mv zoo_sample.cfg zoo.cfg
(2)打开zoo.cfg文件
vi zoo.cfg
修改数据存储路径配置
dataDir=/opt/module/zookeeper-3.4.10/zkData
增加如下配置
#######################cluster##########################
server.2=hadoop102:2888:3888
server.3=hadoop103:2888:3888
server.4=hadoop104:2888:3888
(3)进入module目录,进行分发
xsync zookeeper-3.4.10/
(4)修改hadoop103,104的myid
cd /opt/module/zookeeper-3.4.10/zkData/
[atguigu@hadoop102 zookeeper-3.4.10]$ bin/zkServer.sh start
[atguigu@hadoop103 zookeeper-3.4.10]$ bin/zkServer.sh start
[atguigu@hadoop104 zookeeper-3.4.10]$ bin/zkServer.sh start
(2)查看状态
bin/zkServer.sh status
原则上两个flower一个leader
1)在hadoop102的/home/atguigu/bin目录下创建脚本
cd /home/atguigu/bin/
vi zk.sh
在脚本中编写如下内容
#! /bin/bash case $1 in "start"){ for i in hadoop102 hadoop103 hadoop104 do ssh $i "/opt/module/zookeeper-3.4.10/bin/zkServer.sh start" done };; "stop"){ for i in hadoop102 hadoop103 hadoop104 do ssh $i "/opt/module/zookeeper-3.4.10/bin/zkServer.sh stop" done };; "status"){ for i in hadoop102 hadoop103 hadoop104 do ssh $i "/opt/module/zookeeper-3.4.10/bin/zkServer.sh status" done };; esac
2)增加脚本执行权限
chmod 777 zk.sh
3)Zookeeper集群启动脚本
[atguigu@hadoop102 module]$ zk.sh start
4)Zookeeper集群停止脚本
[atguigu@hadoop102 module]$ zk.sh stop
5)Zookeeper集群状态查看脚本
[atguigu@hadoop102 module]$ zk.sh status
注意在使用前还需要一下配置
把/etc/profile里面的环境变量追加到~/.bashrc目录
即执行以下代码(三台虚拟机都要执行)
cat /etc/profile >> ~/.bashrc
1)在/home/atguigu/bin目录下创建脚本lg.sh
[atguigu@hadoop102 bin]$ vi lg.sh
2)在脚本中编写如下内容
#! /bin/bash
for i in hadoop102 hadoop103
do
ssh $i "java -classpath /opt/module/log-collector-1.0-SNAPSHOT-jar-with-dependencies.jar com.atguigu.appclient.AppMain $1 $2 >/opt/module/test.log &"
done
3)修改脚本执行权限
chmod 777 lg.sh
4)启动脚本
[atguigu@hadoop102 module]$ lg.sh
1)在/home/atguigu/bin目录下创建脚本dt.sh
[atguigu@hadoop102 bin]$ vim dt.sh
2)在脚本中编写如下内容
#!/bin/bash
log_date=$1
for i in hadoop102 hadoop103 hadoop104
do
ssh -t $i "sudo date -s $log_date"
done
说明(ssh -t):https://www.cnblogs.com/kevingrace/p/6110842.html
3)修改脚本执行权限
chmod 777 dt.sh
4)启动脚本
dt.sh 2019-2-10
1)在/home/atguigu/bin目录下创建脚本xcall.sh
[atguigu@hadoop102 bin]$ vim xcall.sh
2)在脚本中编写如下内容
#! /bin/bash
for i in hadoop102 hadoop103 hadoop104
do
echo --------- $i ----------
ssh $i "$*"
done
3)修改脚本执行权限
chmod 777 xcall.sh
4)启动脚本
xcall.sh jps
1)将apache-flume-1.7.0-bin.tar.gz上传到linux的/opt/software目录下
2)解压apache-flume-1.7.0-bin.tar.gz到/opt/module/目录下
tar -zxf apache-flume-1.7.0-bin.tar.gz -C /opt/module/
3)修改apache-flume-1.7.0-bin的名称为flume
mv apache-flume-1.7.0-bin flume
4) 将flume/conf下的flume-env.sh.template文件修改为flume-env.sh,并配置flume-env.sh文件
[atguigu@hadoop102 conf]$ mv flume-env.sh.template flume-env.sh
[atguigu@hadoop102 conf]$ vi flume-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_144
5)分发flume
进入module目录
xsync flume/
(1)在/opt/module/flume/conf目录下创建file-flume-kafka.conf文件
vi file-flume-kafka.conf
在文件配置如下内容
a1.sources=r1 a1.channels=c1 c2 # configure source a1.sources.r1.type = TAILDIR a1.sources.r1.positionFile = /opt/module/flume/test/log_position.json a1.sources.r1.filegroups = f1 a1.sources.r1.filegroups.f1 = /tmp/logs/app.+ a1.sources.r1.fileHeader = true a1.sources.r1.channels = c1 c2 #interceptor a1.sources.r1.interceptors = i1 i2 a1.sources.r1.interceptors.i1.type = com.atguigu.flume.interceptor.LogETLInterceptor$Builder a1.sources.r1.interceptors.i2.type = com.atguigu.flume.interceptor.LogTypeInterceptor$Builder a1.sources.r1.selector.type = multiplexing a1.sources.r1.selector.header = topic a1.sources.r1.selector.mapping.topic_start = c1 a1.sources.r1.selector.mapping.topic_event = c2 # configure channel a1.channels.c1.type = org.apache.flume.channel.kafka.KafkaChannel a1.channels.c1.kafka.bootstrap.servers = hadoop102:9092,hadoop103:9092,hadoop104:9092 a1.channels.c1.kafka.topic = topic_start a1.channels.c1.parseAsFlumeEvent = false a1.channels.c1.kafka.consumer.group.id = flume-consumer a1.channels.c2.type = org.apache.flume.channel.kafka.KafkaChannel a1.channels.c2.kafka.bootstrap.servers = hadoop102:9092,hadoop103:9092,hadoop104:9092 a1.channels.c2.kafka.topic = topic_event a1.channels.c2.parseAsFlumeEvent = false a1.channels.c2.kafka.consumer.group.id = flume-consumer
(2) 分发
xsync file-flume-kafka.conf
先将打好的包放入到hadoop102的/opt/module/flume/lib文件夹下面。
cd /opt/module/flume/lib/
ls | grep interceptor
xsync flume-interceptor-1.0-SNAPSHOT.jar
[atguigu@hadoop102 flume]$ bin/flume-ng agent --name a1 --conf-file conf/file-flume-kafka.conf &
1)在/home/atguigu/bin目录下创建脚本f1.sh
[atguigu@hadoop102 bin]$ vim f1.sh
在脚本中填写如下内容
#! /bin/bash case $1 in "start"){ for i in hadoop102 hadoop103 do echo " --------启动 $i 采集flume-------" ssh $i "nohup /opt/module/flume/bin/flume-ng agent --conf-file /opt/module/flume/conf/file-flume-kafka.conf --name a1 -Dflume.root.logger=INFO,LOGFILE >/dev/null 2>&1 &" done };; "stop"){ for i in hadoop102 hadoop103 do echo " --------停止 $i 采集flume-------" ssh $i "ps -ef | grep file-flume-kafka | grep -v grep |awk '{print \$2}' | xargs kill" done };; esac
说明1:nohup,该命令可以在你退出帐户/关闭终端之后继续运行相应的进程。nohup就是不挂起的意思,不挂断地运行命令。
说明2:/dev/null代表linux的空设备文件,所有往这个文件里面写入的内容都会丢失,俗称“黑洞”。
标准输入0:从键盘获得输入 /proc/self/fd/0
标准输出1:输出到屏幕(即控制台) /proc/self/fd/1
错误输出2:输出到屏幕(即控制台) /proc/self/fd/2
2)增加脚本执行权限
chmod 777 f1.sh
3)f1集群启动脚本
[atguigu@hadoop102 module]$ f1.sh start
4)f1集群停止脚本
[atguigu@hadoop102 module]$ f1.sh stop
1)解压安装包
tar -zxvf kafka_2.11-0.11.0.2.tgz -C /opt/module/
kafka_2.11-0.11.0.2/
2)修改解压后的文件名称
mv kafka_2.11-0.11.0.2/ kafka
3)在/opt/module/kafka目录下创建logs文件夹
mkdir logs
4)修改配置文件
cd /opt/module/kafka/config/
vi server.properties
5)分发kafka
在module目录下执行
xsync kafka/
修改配置文件中的broker.id
cd /opt/module/kafka/config/
vi server.properties
注:broker.id不得重复 分别是1和2
6)kafka启动
依次在hadoop102、hadoop103、hadoop104节点上启动kafka
bin/kafka-server-start.sh config/server.properties &
9)关闭集群
bin/kafka-server-stop.sh stop
1)在/home/atguigu/bin目录下创建脚本kf.sh
vim kf.sh
在脚本中填写如下内容
#! /bin/bash case $1 in "start"){ for i in hadoop102 hadoop103 hadoop104 do echo " --------启动 $i Kafka-------" # 用于KafkaManager监控 ssh $i "export JMX_PORT=9988 && /opt/module/kafka/bin/kafka-server-start.sh -daemon /opt/module/kafka/config/server.properties " done };; "stop"){ for i in hadoop102 hadoop103 hadoop104 do echo " --------停止 $i Kafka-------" ssh $i "/opt/module/kafka/bin/kafka-server-stop.sh stop" done };; esac
说明:启动Kafka时要先开启JMX端口,是用于后续KafkaManager监控。
2)增加脚本执行权限
chmod 777 kf.sh
3)kf集群启动脚本
kf.sh start
4)kf集群停止脚本
kf.sh stop
在hadoop102中
cd /opt/module/kafka/
Kafka消费消息
bin/kafka-console-consumer.sh \
--zookeeper hadoop102:2181 --from-beginning --topic topic_start
启动之后
克隆hadoop102
cd /opt/module/kafka/
kafka生产消息
bin/kafka-console-producer.sh \
--broker-list hadoop102:9092 --topic topic_start
在克隆的会话中输入 hello
那么原本的kafka就会输出hello
cd /opt/software/
mv kafka-manager-1.3.3.22.zip /opt/module/
unzip kafka-manager-1.3.3.22.zip
补充:下载unzip命令
yum install -y unzip zip
[atguigu@hadoop102 conf]$ vi application.conf
修改为:
复制这一段
kafka-manager.zkhosts="hadoop102:2181,hadoop103:2181,hadoop104:2181"
[atguigu@hadoop102 kafka-manager-1.3.3.22]$
nohup bin/kafka-manager -Dhttp.port=7456 >/opt/module/kafka-manager-1.3.3.22/start.log 2>&1 &
http://hadoop102:7456
可以看到这个界面,选择添加 cluster;
至此,就可以查看整个Kafka集群的状态,包括:Topic的状态、Brokers的状态、Cosumer的状态。
在Kafka的/opt/module/kafka-manager-1.3.3.22/application.home_IS_UNDEFINED 目录下面,可以看到Kafka-Manager的日志。
1)在/home/atguigu/bin目录下创建脚本km.sh
vi km.sh
在脚本中填写如下内容
#! /bin/bash
case $1 in
"start"){
echo " -------- 启动 KafkaManager -------"
nohup /opt/module/kafka-manager-1.3.3.22/bin/kafka-manager -Dhttp.port=7456 >start.log 2>&1 &
};;
"stop"){
echo " -------- 停止 KafkaManager -------"
ps -ef | grep ProdServerStart | grep -v grep |awk '{print $2}' | xargs kill
};;
esac
2)增加脚本执行权限
[atguigu@hadoop102 bin]$ chmod 777 km.sh
3)km集群启动脚本
[atguigu@hadoop102 module]$ km.sh start
4)km集群停止脚本
[atguigu@hadoop102 module]$ km.sh stop
1)Flume配置分析
2)Flume的具体配置如下:
(1)在hadoop104的/opt/module/flume/conf目录下创建kafka-flume-hdfs.conf文件
[atguigu@hadoop104 bin]$ cd /opt/module/flume/conf/
vi kafka-flume-hdfs.conf
在文件配置如下内容
## 组件 a1.sources=r1 r2 a1.channels=c1 c2 a1.sinks=k1 k2 ## source1 a1.sources.r1.type = org.apache.flume.source.kafka.KafkaSource a1.sources.r1.batchSize = 5000 a1.sources.r1.batchDurationMillis = 2000 a1.sources.r1.kafka.bootstrap.servers = hadoop102:9092,hadoop103:9092,hadoop104:9092 a1.sources.r1.kafka.topics=topic_start ## source2 a1.sources.r2.type = org.apache.flume.source.kafka.KafkaSource a1.sources.r2.batchSize = 5000 a1.sources.r2.batchDurationMillis = 2000 a1.sources.r2.kafka.bootstrap.servers = hadoop102:9092,hadoop103:9092,hadoop104:9092 a1.sources.r2.kafka.topics=topic_event ## channel1 a1.channels.c1.type = file a1.channels.c1.checkpointDir = /opt/module/flume/checkpoint/behavior1 a1.channels.c1.dataDirs = /opt/module/flume/data/behavior1/ a1.channels.c1.maxFileSize = 2146435071 a1.channels.c1.capacity = 1000000 a1.channels.c1.keep-alive = 6 ## channel2 a1.channels.c2.type = file a1.channels.c2.checkpointDir = /opt/module/flume/checkpoint/behavior2 a1.channels.c2.dataDirs = /opt/module/flume/data/behavior2/ a1.channels.c2.maxFileSize = 2146435071 a1.channels.c2.capacity = 1000000 a1.channels.c2.keep-alive = 6 ## sink1 a1.sinks.k1.type = hdfs a1.sinks.k1.hdfs.path = /origin_data/gmall/log/topic_start/%Y-%m-%d a1.sinks.k1.hdfs.filePrefix = logstart- a1.sinks.k1.hdfs.round = true a1.sinks.k1.hdfs.roundValue = 10 a1.sinks.k1.hdfs.roundUnit = second ##sink2 a1.sinks.k2.type = hdfs a1.sinks.k2.hdfs.path = /origin_data/gmall/log/topic_event/%Y-%m-%d a1.sinks.k2.hdfs.filePrefix = logevent- a1.sinks.k2.hdfs.round = true a1.sinks.k2.hdfs.roundValue = 10 a1.sinks.k2.hdfs.roundUnit = second ## 不要产生大量小文件 a1.sinks.k1.hdfs.rollInterval = 10 a1.sinks.k1.hdfs.rollSize = 134217728 a1.sinks.k1.hdfs.rollCount = 0 a1.sinks.k2.hdfs.rollInterval = 10 a1.sinks.k2.hdfs.rollSize = 134217728 a1.sinks.k2.hdfs.rollCount = 0 ## 控制输出文件是原生文件。 a1.sinks.k1.hdfs.fileType = CompressedStream a1.sinks.k2.hdfs.fileType = CompressedStream a1.sinks.k1.hdfs.codeC = lzop a1.sinks.k2.hdfs.codeC = lzop ## 拼装 a1.sources.r1.channels = c1 a1.sinks.k1.channel= c1 a1.sources.r2.channels = c2 a1.sinks.k2.channel= c2
1)在/home/atguigu/bin目录下创建脚本f2.sh
vi f2.sh
在脚本中填写如下内容
#! /bin/bash case $1 in "start"){ for i in hadoop104 do echo " --------启动 $i 消费flume-------" ssh $i "nohup /opt/module/flume/bin/flume-ng agent --conf-file /opt/module/flume/conf/kafka-flume-hdfs.conf --name a1 -Dflume.root.logger=INFO,LOGFILE >/opt/module/flume/log.txt 2>&1 &" done };; "stop"){ for i in hadoop104 do echo " --------停止 $i 消费flume-------" ssh $i "ps -ef | grep kafka-flume-hdfs | grep -v grep |awk '{print \$2}' | xargs kill" done };; esac
2)增加脚本执行权限
chmod 777 f2.sh
3)f2集群启动脚本
[atguigu@hadoop102 module]$ f2.sh start
4)f2集群停止脚本
[atguigu@hadoop102 module]$ f2.sh stop
1)在/home/atguigu/bin目录下创建脚本cluster.sh
vi cluster.sh
在脚本中填写如下内容
#! /bin/bash case $1 in "start"){ echo " -------- 启动 集群 -------" echo " -------- 启动 hadoop集群 -------" /opt/module/hadoop-2.7.2/sbin/start-dfs.sh ssh hadoop103 "/opt/module/hadoop-2.7.2/sbin/start-yarn.sh" #启动 Zookeeper集群 zk.sh start sleep 4s; #启动 Flume采集集群 f1.sh start #启动 Kafka采集集群 kf.sh start sleep 6s; #启动 Flume消费集群 f2.sh start #启动 KafkaManager km.sh start };; "stop"){ echo " -------- 停止 集群 -------" #停止 KafkaManager km.sh stop #停止 Flume消费集群 f2.sh stop #停止 Kafka采集集群 kf.sh stop sleep 6s; #停止 Flume采集集群 f1.sh stop #停止 Zookeeper集群 zk.sh stop echo " -------- 停止 hadoop集群 -------" ssh hadoop103 "/opt/module/hadoop-2.7.2/sbin/stop-yarn.sh" /opt/module/hadoop-2.7.2/sbin/stop-dfs.sh };; esac
2)增加脚本执行权限
[atguigu@hadoop102 bin]$ chmod 777 cluster.sh
3)cluster集群启动脚本
[atguigu@hadoop102 module]$ cluster.sh start
4)cluster集群停止脚本
[atguigu@hadoop102 module]$ cluster.sh stop
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。