赞
踩
一、 安装JDK [选择jdk1.7为佳]
(1)(a)自动下载安装
(b)下载之后解压安装
# tar –xvf/home/root/spark1/jdk-8u45-linux-x64.tar.gz –C /home/root/spark1
#mv/home/root/spark1/jdk1.8.0 /usr/local/lee/jdk1.8.0
(2)卸载CentOS自带JAVA
查看已安装Java版本
[root@spark1~]# rpm -qa|grep java
java-1.8.0-openjdk-devel-1.8.0.45-28.b13.el6_6.x86_64
tzdata-java-2012j-1.el6.noarch
java-1.8.0-openjdk-1.8.0.45-28.b13.el6_6.x86_64
java-1.6.0-openjdk-1.6.0.0-1.50.1.11.5.el6_3.x86_64
java-1.8.0-openjdk-headless-1.8.0.45-28.b13.el6_6.x86_64
java-1.7.0-openjdk-1.7.0.9-2.3.4.1.el6_3.x86_64
卸载不需要的Java版本
# yum -y removejava-1.6.0-openjdk-1.6.0.0-1.50.1.11.5.el6_3.x86_64
# yum -y remove java-1.7.0-openjdk-1.7.0.9-2.3.4.1.el6_3.x86_64
# yum -y remove java-1.8.0-openjdk-headless-1.8.0.45-28.b13.el6_6.x86_64
使配置文件失效
#source/etc/profile
二、 修改主机名
(1) 查看主机名
#hostname
(2) 修改network
#vim /etc/sysconfig/network
(3) 修改hosts
#vim /etc/hosts
重启之后生效!
三、 关闭防火墙
永久性关闭防火墙:
#chkconfig--level 35 iptables off
四、 配置SSH
#yum installopenssh-serverd
安装完成之后
#ssh-keygen –trsa
#cp id_rsa.pubauthorized
分发给节点机
#scp~/.ssh/authorized_keys 节点机用户名@节点机主机名:~/.ssh
五、 安装haddop
配置7个相关文件
(1) hadoop-env.sh
exportJAVA_HOME=/usr/local/lee/jdk1.8.0
export HADOOP_OPTS=-Djava.net.preferIPv4Stack=true
(2) yarn-env.sh
export JAVA_HOME=/usr/local/lee/jdk1.8.0
(3) slaves
spark1
spark2
spark3
(4) core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://spark:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/lee/hadoop/tmp</value>
<description>Abasefor other temporary directories.</description>
</property>
<property>
<name>hadoop.proxyuser.hduser.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hduser.groups</name>
<value>*</value>
</property>
</configuration>
(5) hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>spark:9001</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/lee/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/lee/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
(6) yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>spark:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>spark:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>spark:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>spark:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>spark:8088</value>
</property>
</configuration>
(7) mapred-site.xml
# mv mapred-site.xml.template mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>spark:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>spark:19888</value>
</property>
</configuration>
六、 启用新用户spark
#useraddspark
#passwdspark
修改/etc/sudoers 文件,找到root一行,在root下面添加一行,如下所示:
##Allow root to run any commands anywhere
root ALL=(ALL) ALL
spark ALL=(ALL) ALL
修改完毕,现在可以用spark帐号登录,然后用命令 sudosu - ,即可获得root权限进行操作。
修改文件夹权限
#sudochown -R spark:spark /usr/local/lee
(hadoop配置文件均放置在统一文件夹(如:lee)下,所以将这个文件夹所有者更改为spark用户,以便hadoop格式化时建立dfs/name、data等文件夹)
七、格式化hadoop
#hadoopnamenode -format
启动hadoop并查看状态
#start-all.sh
#jps
#hadoopdfsadmin –report
八、配置spark
(1)
访问http://spark:8080/,进入spark WEBUI页面。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。