当前位置:   article > 正文

手动安装Hive 3(以 hdp 为例,适用于 CentOS 8)_hive-beeline-3.1.0.3.1.5.0-152

hive-beeline-3.1.0.3.1.5.0-152



1 环境

下面将以 hdp 版(HDP-3.1.5.0-centos7-rpm.tar.gz)为例详细介绍 hive 3 的安装以及配置,环境可以为 CentOS 7 或者 CentOS 8(虽然 tar 包为 centos7,但本文使用的组建 rpm 依然适用于CentOS 8 )。

安装之前需先手动安装并配置好如下:

  1. JDK8
  2. SSH
  3. 防火墙
  4. hostname/网络(CentOS 8 配置 nmcli)
  5. SELinux
  6. NTP(CentOS 8 安装 chronyc)

安装的组建及版本:

  • Hadoop   |   3.1.1.3.1.5.0-152
  • ZooKeeper   |   3.4.6.3.1.5.0-152
  • Hive   |   3.1.0.3.1.5.0-152
  • Tez   |   0.9.1.3.1.5.0-152
  • Spark   |   2.3.2.3.1.5.0-152

2 安装

# 0 解压 HDP 包
tar -zxf HDP-3.1.5.0-centos7-rpm.tar.gz

# 1 依赖(可选)
cd HDP/centos7/3.1.5.0-152/bigtop-tomcat
rpm -ivh bigtop-tomcat-7.0.94-1.noarch.rpm
cd ../bigtop-jsvc
rpm -ivh bigtop-jsvc-1.0.15-152.x86_64.rpm
cd ../hdp-select
rpm -ivh hdp-select-3.1.5.0-152.el7.noarch.rpm
yum install -y redhat-lsb

# 2 时间同步
yum -y install chrony
## 添加 server ntp1.aliyun.com iburst
vim /etc/chrony.conf
## 查看
chronyc sourcestats -v
timedatectl

# 3 Zookeeper(HA 及 部分组建,例如 Hive 依赖于 ZK)
cd ../zookeeper
rpm -ivh *.rpm
ln -s /usr/hdp/3.1.5.0-152 /usr/hdp/current

# 4 hadoop
cd ../hadoop
rpm -ivh *.rpm --nodeps

# 5 tez(Hive 引擎)
cd ../tez
rpm -ivh *.rpm

# 6 hive
cd ../hive
rpm -ivh *.rpm --nodeps

# 7 spark2(可选)
## 如果 yarn.nodemanager.aux-services 添加了 spark2_shuffle 需安装
cd ../spark2
rpm -ivh spark2_3_1_5_0_152-yarn-shuffle-2.3.2.3.1.5.0-152.noarch.rpm

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42

3 配置

以下涉及到内存的根据自己服务器配置情况进行配置,尽可能设置比较合理充足的大小,同时每个配置结束后又给出了一份生产环境的配置可供参考。

3.1 /etc/hadoop/conf/workers

添加 Hadoop workers节点

3.2 /etc/hadoop/conf/hadoop-env.sh

export JAVA_HOME=/usr/local/jdk8
export HADOOP_HOME_WARN_SUPPRESS=1
export HADOOP_HOME=/usr/hdp/current/hadoop
export HADOOP_CONF_DIR=${HADOOP_HOME}/conf
export JSVC_HOME=/usr/lib/bigtop-utils
export HADOOP_HEAPSIZE="1024"
export HADOOP_NAMENODE_INIT_HEAPSIZE="-Xms1024m"
export HADOOP_OPTS="-Djava.net.preferIPv4Stack=true ${HADOOP_OPTS}"
USER="$(whoami)"
HADOOP_JOBTRACKER_OPTS="-server -XX:ParallelGCThreads=1 -XX:+UseConcMarkSweepGC -XX:ErrorFile=/var/log/hadoop/$USER/hs_err_pid%p.log -XX:NewSize=200m -XX:MaxNewSize=200m -Xloggc:/var/log/hadoop/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xmx1024m -Dhadoop.security.logger=INFO,DRFAS -Dmapred.audit.logger=INFO,MRAUDIT -Dhadoop.mapreduce.jobsummary.logger=INFO,JSA ${HADOOP_JOBTRACKER_OPTS}"
HADOOP_TASKTRACKER_OPTS="-server -Xmx1024m -Dhadoop.security.logger=ERROR,console -Dmapred.audit.logger=ERROR,console ${HADOOP_TASKTRACKER_OPTS}"
SHARED_HDFS_NAMENODE_OPTS="-server -XX:ParallelGCThreads=1 -XX:+UseConcMarkSweepGC -XX:ErrorFile=/var/log/hadoop/$USER/hs_err_pid%p.log -XX:NewSize=256m -XX:MaxNewSize=256m -Xloggc:/var/log/hadoop/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -Xms1024m -Xmx1024m -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT"
export HDFS_NAMENODE_OPTS="${SHARED_HDFS_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop/bin/kill-name-node\" -Dorg.mortbay.jetty.Request.maxFormContentSize=-1 ${HDFS_NAMENODE_OPTS}"
export HDFS_DATANODE_OPTS="-server -XX:ParallelGCThreads=1 -XX:+UseConcMarkSweepGC -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop/bin/kill-data-node\" -XX:ErrorFile=/var/log/hadoop/$USER/hs_err_pid%p.log -XX:NewSize=200m -XX:MaxNewSize=200m -Xloggc:/var/log/hadoop/$USER/gc.log-`date +'%Y%m%d%H%M'` -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xms1024m -Xmx1024m -Dhadoop.security.logger=INFO,DRFAS -Dhdfs.audit.logger=INFO,DRFAAUDIT ${HDFS_DATANODE_OPTS} -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly"
export HDFS_SECONDARYNAMENODE_OPTS="${SHARED_HDFS_NAMENODE_OPTS} -XX:OnOutOfMemoryError=\"/usr/hdp/current/hadoop/bin/kill-secondary-name-node\" ${HDFS_SECONDARYNAMENODE_OPTS}"
export HADOOP_CLIENT_OPTS="-Xmx${HADOOP_HEAPSIZE}m $HADOOP_CLIENT_OPTS"
HDFS_NFS3_OPTS="-Xmx1024m -Dhadoop.security.logger=ERROR,DRFAS ${HDFS_NFS3_OPTS}"
HADOOP_BALANCER_OPTS="-server -Xmx1024m ${HADOOP_BALANCER_OPTS}"
export HDFS_DATANODE_SECURE_USER=${HDFS_DATANODE_SECURE_USER:-""}
export HADOOP_SSH_OPTS="-o ConnectTimeout=5 -o SendEnv=HADOOP_CONF_DIR"
export HADOOP_LOG_DIR=/var/log/hadoop/$USER
export HADOOP_SECURE_LOG_DIR=${HADOOP_SECURE_LOG_DIR:-/var/log/hadoop/$HDFS_DATANODE_SECURE_USER}
export HADOOP_PID_DIR=/var/run/hadoop/$USER
export HADOOP_SECURE_PID_DIR=${HADOOP_SECURE_PID_DIR:-/var/run/hadoop/$HDFS_DATANODE_SECURE_USER}
YARN_RESOURCEMANAGER_OPTS="-Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY"
export HADOOP_IDENT_STRING=$USER
# Add database libraries
JAVA_JDBC_LIBS=""
if [ -d "/usr/share/java" ]; then
for jarFile in `ls /usr/share/java | grep -E "(mysql|ojdbc|postgresql|sqljdbc)" 2>/dev/null`
do
JAVA_JDBC_LIBS=${JAVA_JDBC_LIBS}:$jarFile
done
fi
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}${JAVA_JDBC_LIBS}
export HADOOP_LIBEXEC_DIR=/usr/hdp/current/hadoop/libexec
export JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:/usr/hdp/current/hadoop/lib/native/Linux-amd64-64
export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS"
if [ "$command" == "datanode" ] && [ "$EUID" -eq 0 ] && [ -n "$HDFS_DATANODE_SECURE_USER" ]; then
ulimit -n 128000
fi
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • HADOOP_NAMENODE_INIT_HEAPSIZE 值可以设置为 "-Xms20480m"
  • HADOOP_JOBTRACKER_OPTS GC的线程的并行线程数可以设置为 8 -XX:ParallelGCThreads=8-XX:ErrorFile-Xloggc 可以设置为数据盘的路径;
  • SHARED_HDFS_NAMENODE_OPTS GC的线程的并行线程数可以设置为 8 -XX:ParallelGCThreads=8-XX:ErrorFile-Xloggc 可以设置为数据盘的路径;-XX:NewSize=2560m -XX:MaxNewSize=2560m 新生代初始内存的大小可以设置为稍大的值(需小于-Xms);
  • HDFS_DATANODE_OPTS GC的线程的并行线程数可以设置为4 -XX:ParallelGCThreads=4-XX:ErrorFile-Xloggc 可以设置为数据盘的路径;初始化和最大堆内存可以调大为 -Xms13568m -Xmx13568m;
  • HADOOP_LOG_DIR 可以设置到数据盘。
  • HADOOP_SECURE_LOG_DIR 可以设置到数据盘。

3.3 /etc/hadoop/conf/core-site.xml

  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://node01:8020</value>
    <final>true</final>
  </property>
  <property>
    <name>fs.trash.interval</name>
    <value>360</value>
  </property>

  <property>
    <name>hadoop.http.cross-origin.allowed-headers</name>
    <value>X-Requested-With,Content-Type,Accept,Origin,WWW-Authenticate,Accept-Encoding,Transfer-Encoding</value>
  </property>
  <property>
    <name>hadoop.http.cross-origin.allowed-methods</name>
    <value>GET,PUT,POST,OPTIONS,HEAD,DELETE</value>
  </property>
  <property>
    <name>hadoop.http.cross-origin.allowed-origins</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.http.cross-origin.max-age</name>
    <value>1800</value>
  </property>
  <property>
    <name>hadoop.http.filter.initializers</name>
    <value>org.apache.hadoop.security.AuthenticationFilterInitializer,org.apache.hadoop.security.HttpCrossOriginFilterInitializer</value>
  </property>
  <property>
    <name>hadoop.security.auth_to_local</name>
    <value>DEFAULT</value>
  </property>
  <property>
    <name>hadoop.security.authentication</name>
    <value>simple</value>
  </property>
  <property>
    <name>hadoop.security.authorization</name>
    <value>false</value>
  </property>
  <property>
    <name>hadoop.security.instrumentation.requires.admin</name>
    <value>false</value>
  </property>
  <property>
    <name>io.compression.codecs</name>
    <value>org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.SnappyCodec</value>
  </property>
  <property>
    <name>io.file.buffer.size</name>
    <value>4096</value>
  </property>
  <property>
    <name>io.serializations</name>
    <value>org.apache.hadoop.io.serializer.WritableSerialization</value>
  </property>
  <property>
    <name>ipc.client.connect.max.retries</name>
    <value>10</value>
  </property>
  <property>
    <name>ipc.client.connection.maxidletime</name>
    <value>10000</value>
  </property>
  <property>
    <name>ipc.client.idlethreshold</name>
    <value>4000</value>
  </property>
  <property>
    <name>ipc.server.tcpnodelay</name>
    <value>true</value>
  </property>
  <property>
    <name>mapreduce.jobtracker.webinterface.trusted</name>
    <value>false</value>
  </property>
  <!--<property>
    <name>net.topology.script.file.name</name>
    <value>/etc/hadoop/conf/topology_script.py</value>
  </property>-->

  <property>
    <name>hadoop.proxyuser.hdfs.groups</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.hdfs.hosts</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.hive.groups</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.hive.hosts</name>
    <!--<value>bdm0,bdm1</value>-->
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.hue.groups</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.hue.hosts</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.impala.groups</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.impala.hosts</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.livy.groups</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.livy.hosts</name>
    <value>*</value>
  </property>
  <!--<property>
    <name>hadoop.proxyuser.oozie.groups</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.oozie.hosts</name>
    <value>bdm0</value>
  </property>-->
  <property>
    <name>hadoop.proxyuser.root.groups</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.root.hosts</name>
    <value>*</value>
  </property>
  <property>
    <name>hadoop.proxyuser.yarn.hosts</name>
    <value>*</value>
  </property>

  <!--
  <property>
    <name>ha.failover-controller.active-standby-elector.zk.op.retries</name>
    <value>120</value>
  </property>
  <property>
    <name>ha.zookeeper.quorum</name>
    <value>es1:2181,es2:2181,bdm0:2181,bdm1:2181,etl1:2181</value>
  </property>
  -->
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • fs.defaultFS 如果是配置了 HA 可以简写为 hdfs://nameservice,nameservice 可以为任意其它合法的名字,后面配置保持统一即可。
  • 其它配置项详见官方文档 core-default.xml

3.4 /etc/hadoop/conf/hdfs-site.xml

  <property>
    <name>dfs.permissions.superusergroup</name>
    <value>hdfs</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>1</value>
  </property>
  <property>
    <name>dfs.namenode.http-address</name>
    <value>node01:50070</value>
  </property>
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>/hadoop/hdfs/namenode</value>
    <final>true</final>
  </property>
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>/hadoop/hdfs/sda,/hadoop/hdfs/sdb</value>
    <final>true</final>
  </property>
  <property>
    <name>dfs.namenode.checkpoint.dir</name>
    <value>/hadoop/hdfs/namesecondary</value>
  </property>
  <property>
    <name>dfs.namenode.secondary.http-address	</name>
    <value>node01:50090</value>
  </property>
  <property>
    <name>dfs.permissions.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit</name>
    <value>true</value>
  </property>

  <property>
    <name>dfs.block.access.token.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.blockreport.initialDelay</name>
    <value>120</value>
  </property>
  <property>
    <name>dfs.blocksize</name>
    <value>134217728</value>
  </property>
  <property>
    <name>dfs.client.failover.proxy.provider.nameservice</name>
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit.streams.cache.size</name>
    <value>256</value>
  </property>
  <property>
    <name>dfs.client.retry.policy.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>dfs.cluster.administrators</name>
    <value>hdfs</value>
  </property>
  <property>
    <name>dfs.content-summary.limit</name>
    <value>5000</value>
  </property>
  <property>
    <name>dfs.datanode.address</name>
    <value>0.0.0.0:50010</value>
  </property>
  <property>
    <name>dfs.datanode.balance.bandwidthPerSec</name>
    <value>6250000</value>
  </property>
  <property>
    <name>dfs.datanode.data.dir.perm</name>
    <value>750</value>
  </property>
  <property>
    <name>dfs.datanode.du.reserved</name>
    <value>1340866560</value>
  </property>
  <property>
    <name>dfs.datanode.failed.volumes.tolerated</name>
    <value>0</value>
    <final>true</final>
  </property>
  <property>
    <name>dfs.datanode.http.address</name>
    <value>0.0.0.0:50075</value>
  </property>
  <property>
    <name>dfs.datanode.https.address</name>
    <value>0.0.0.0:50475</value>
  </property>
  <property>
    <name>dfs.datanode.ipc.address</name>
    <value>0.0.0.0:8010</value>
  </property>
  <property>
    <name>dfs.datanode.max.transfer.threads</name>
    <value>4096</value>
  </property>
  <property>
    <name>dfs.domain.socket.path</name>
    <value>/var/lib/hadoop-hdfs/dn_socket</value>
  </property>
  <property>
    <name>dfs.encrypt.data.transfer.cipher.suites</name>
    <value>AES/CTR/NoPadding</value>
  </property>
  <property>
    <name>dfs.heartbeat.interval</name>
    <value>3</value>
  </property>
  <property>
    <name>dfs.hosts.exclude</name>
    <value>/etc/hadoop/conf/dfs.exclude</value>
  </property>
  <property>
    <name>dfs.http.policy</name>
    <value>HTTP_ONLY</value>
  </property>
  <property>
    <name>dfs.https.port</name>
    <value>50470</value>
  </property>
  <property>
    <name>dfs.namenode.accesstime.precision</name>
    <value>0</value>
  </property>
  <property>
    <name>dfs.namenode.acls.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.namenode.audit.log.async</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.namenode.avoid.read.stale.datanode</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.namenode.avoid.write.stale.datanode</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.namenode.checkpoint.edits.dir</name>
    <value>${dfs.namenode.checkpoint.dir}</value>
  </property>
  <property>
    <name>dfs.namenode.checkpoint.period</name>
    <value>21600</value>
  </property>
  <property>
    <name>dfs.namenode.checkpoint.txns</name>
    <value>1000000</value>
  </property>
  <property>
    <name>dfs.namenode.fslock.fair</name>
    <value>false</value>
  </property>
  <property>
    <name>dfs.namenode.handler.count</name>
    <value>800</value>
  </property>
  <property>
    <name>dfs.namenode.name.dir.restore</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.namenode.safemode.threshold-pct</name>
    <value>0.99</value>
  </property>
  <property>
    <name>dfs.namenode.stale.datanode.interval</name>
    <value>30000</value>
  </property>
  <property>
    <name>dfs.namenode.startup.delay.block.deletion.sec</name>
    <value>3600</value>
  </property>
  <property>
    <name>dfs.namenode.write.stale.datanode.ratio</name>
    <value>1.0f</value>
  </property>
  <property>
    <name>dfs.permissions.ContentSummary.subAccess</name>
    <value>false</value>
  </property>
  <property>
    <name>dfs.replication.max</name>
    <value>50</value>
  </property>
  <property>
    <name>dfs.webhdfs.enabled</name>
    <value>true</value>
    <final>true</final>
  </property>
  <property>
    <name>fs.permissions.umask-mode</name>
    <value>022</value>
  </property>
  <property>
    <name>hadoop.caller.context.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hadoop.http.authentication.type</name>
    <value>simple</value>
  </property>
  <property>
    <name>manage.include.files</name>
    <value>false</value>
  </property>
  <property>
    <name>nfs.exports.allowed.hosts</name>
    <value>* rw</value>
  </property>
  <property>
    <name>nfs.file.dump.dir</name>
    <value>/tmp/.hdfs-nfs</value>
  </property>

  <!--
  <property>
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.ha.fencing.methods</name>
    <value>shell(/bin/true)</value>
  </property>
  <property>
    <name>dfs.ha.namenodes.nameservice</name>
    <value>nn1,nn2</value>
  </property>
  <property>
    <name>dfs.internal.nameservices</name>
    <value>nameservice</value>
  </property>
  <property>
    <name>dfs.journalnode.edits.dir</name>
    <value>/hadoop/hdfs/journal</value>
  </property>
  <property>
    <name>dfs.journalnode.http-address</name>
    <value>0.0.0.0:8480</value>
  </property>
  <property>
    <name>dfs.journalnode.https-address</name>
    <value>0.0.0.0:8481</value>
  </property>
  <property>
    <name>dfs.namenode.http-address.nameservice.nn1</name>
    <value>bdm0:50070</value>
  </property>
  <property>
    <name>dfs.namenode.http-address.nameservice.nn2</name>
    <value>bdm1:50070</value>
  </property>
  <property>
    <name>dfs.namenode.https-address.nameservice.nn1</name>
    <value>bdm0:50470</value>
  </property>
  <property>
    <name>dfs.namenode.https-address.nameservice.nn2</name>
    <value>bdm1:50470</value>
  </property>
  <property>
    <name>dfs.namenode.rpc-address.nameservice.nn1</name>
    <value>bdm0:8020</value>
  </property>
  <property>
    <name>dfs.namenode.rpc-address.nameservice.nn2</name>
    <value>bdm1:8020</value>
  </property>
  <property>
    <name>dfs.namenode.shared.edits.dir</name>
    <value>qjournal://bdm0:8485;bdm1:8485;etl1:8485/nameservice</value>
  </property>
  <property>
    <name>dfs.nameservices</name>
    <value>nameservice</value>
  </property>
  -->
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • 218
  • 219
  • 220
  • 221
  • 222
  • 223
  • 224
  • 225
  • 226
  • 227
  • 228
  • 229
  • 230
  • 231
  • 232
  • 233
  • 234
  • 235
  • 236
  • 237
  • 238
  • 239
  • 240
  • 241
  • 242
  • 243
  • 244
  • 245
  • 246
  • 247
  • 248
  • 249
  • 250
  • 251
  • 252
  • 253
  • 254
  • 255
  • 256
  • 257
  • 258
  • 259
  • 260
  • 261
  • 262
  • 263
  • 264
  • 265
  • 266
  • 267
  • 268
  • 269
  • 270
  • 271
  • 272
  • 273
  • 274
  • 275
  • 276
  • 277
  • 278
  • 279
  • 280
  • 281
  • 282
  • 283
  • 284
  • 285
  • 286
  • 287
  • 288
  • 289
  • 290
  • 291
  • 292
  • dfs.replication block 副本数,生产环境建议设置为大于 3 的值。
  • 开启 HA 是需要依赖 ZK,同时 HDFS 进程 SecondaryNameNode 将不需要,改为启动奇数个的 journalnode 进程服务。
  • dfs.namenode.http-address 如果开启 HA 后配置为 dfs.namenode.http-address.nameservice.nn1=bdm0:50070dfs.namenode.http-address.nameservice.nn2=bdm1:50070,nameservice 为 dfs.nameservices 指定的名字。
  • dfs.namenode.https-address 如果开启 HA 后配置为 dfs.namenode.https-address.nameservice.nn1=bdm0:50470dfs.namenode.https-address.nameservice.nn2=bdm1:50470
  • dfs.domain.socket.path=/var/lib/hadoop-hdfs/dn_socket 配置项指定路径的属组,对于启动用户一定要有权限,最好归属者为启动用户,启动用户属于这个组。
  • 其它配置项详见官方文档 hdfs-default.xml

3.5 /etc/hadoop/conf/mapred-env.sh

HDP_VERSION="3.1.5.0-152"
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=900
export HADOOP_LOGLEVEL=${HADOOP_LOGLEVEL:-INFO}
export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-INFO,console}
export HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_DAEMON_ROOT_LOGGER:-${HADOOP_LOGLEVEL},RFA}
export HADOOP_OPTS="-Dhdp.version=$HDP_VERSION $HADOOP_OPTS"
#export HADOOP_OPTS="-Djava.io.tmpdir=/var/lib/ambari-server/data/tmp/hadoop_java_io_tmpdir $HADOOP_OPTS"
export JAVA_LIBRARY_PATH="${JAVA_LIBRARY_PATH}:/var/lib/ambari-server/data/tmp/hadoop_java_io_tmpdir"
export HADOOP_LOG_DIR=/var/log/hadoop-mapreduce/$USER
export HADOOP_PID_DIR=/var/run/hadoop-mapreduce/$USER
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10

3.6 /etc/hadoop/conf/mapred-site.xml

  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
  <property>
    <name>mapred.local.dir</name>
    <value>/hadoop/mapred</value>
  </property>
  <property>
    <name>mapreduce.map.java.opts</name>
    <value>-Xmx1024m</value>
  </property>
  <property>
    <name>mapreduce.map.memory.mb</name>
    <value>256</value>
  </property>
  <property>
    <name>mapreduce.reduce.java.opts</name>
    <value>-Xmx1024m</value>
  </property>
  <property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>256</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.command-opts</name>
    <value>-Xmx512m -Dhdp.version=${hdp.version}</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>node01:10020</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>node01:19888</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.webapp.https.address</name>
    <value>node01:19890</value>
  </property>

  <property>
    <name>hadoop.http.authentication.type</name>
    <value>simple</value>
  </property>
  <property>
    <name>mapred.map.tasks.speculative.execution</name>
    <value>true</value>
  </property>
  <property>
    <name>mapred.reduce.tasks.speculative.execution</name>
    <value>true</value>
  </property>

  <property>
    <name>mapreduce.admin.map.child.java.opts</name>
    <value>-server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
  </property>
  <property>
    <name>mapreduce.admin.reduce.child.java.opts</name>
    <value>-server -XX:NewRatio=8 -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
  </property>
  <property>
    <name>mapreduce.admin.user.env</name>
    <value>LD_LIBRARY_PATH=/usr/hdp/current/hadoop/lib/native:/usr/hdp/current/hadoop/lib/native/Linux-amd64-64</value>
  </property>
  <property>
    <name>mapreduce.cluster.acls.enabled</name>
    <value>false</value>
  </property>

  <property>
    <name>mapreduce.am.max-attempts</name>
    <value>2</value>
  </property>
  <property>
    <name>mapreduce.application.classpath</name>
    <value>$PWD/mr-framework/hadoop/share/hadoop/mapreduce/*:$PWD/mr-framework/hadoop/share/hadoop/mapreduce/lib/*:$PWD/mr-framework/hadoop/share/hadoop/common/*:$PWD/mr-framework/hadoop/share/hadoop/common/lib/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/*:$PWD/mr-framework/hadoop/share/hadoop/yarn/lib/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/*:$PWD/mr-framework/hadoop/share/hadoop/hdfs/lib/*:$PWD/mr-framework/hadoop/share/hadoop/tools/lib/*:/usr/hdp/current/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure</value>
  </property>
  <property>
    <name>mapreduce.application.framework.path</name>
    <value>/hdp/apps/${hdp.version}/mapreduce/mapreduce.tar.gz#mr-framework</value>
  </property>

  <property>
    <name>mapreduce.cluster.administrators</name>
    <value>hadoop</value>
  </property>
  <property>
    <name>mapreduce.job.acl-modify-job</name>
    <value> </value>
  </property>
  <property>
    <name>mapreduce.job.acl-view-job</name>
    <value> </value>
  </property>
  <property>
    <name>mapreduce.job.counters.max</name>
    <value>130</value>
  </property>
  <property>
    <name>mapreduce.job.emit-timeline-data</name>
    <value>true</value>
  </property>
  <property>
    <name>mapreduce.job.queuename</name>
    <value>default</value>
  </property>
  <property>
    <name>mapreduce.job.reduce.slowstart.completedmaps</name>
    <value>0.05</value>
  </property>

  <property>
    <name>mapreduce.jobhistory.admin.acl</name>
    <value>*</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.bind-host</name>
    <value>0.0.0.0</value>
  </property>
  <!--<property>
    <name>mapreduce.jobhistory.done-dir</name>
    <value>/mr-history/done</value>
  </property>-->
  <property>
    <name>mapreduce.jobhistory.http.policy</name>
    <value>HTTP_ONLY</value>
  </property>
  <!--<property>
    <name>mapreduce.jobhistory.intermediate-done-dir</name>
    <value>/mr-history/tmp</value>
  </property>-->
  <property>
    <name>mapreduce.jobhistory.recovery.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.recovery.store.class</name>
    <value>org.apache.hadoop.mapreduce.v2.hs.HistoryServerLeveldbStateStoreService</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.recovery.store.leveldb.path</name>
    <value>/hadoop/mapreduce/jhs</value>
  </property>
  <property>
    <name>mapreduce.map.log.level</name>
    <value>INFO</value>
  </property>
  <property>
    <name>mapreduce.map.output.compress</name>
    <value>false</value>
  </property>
  <property>
    <name>mapreduce.map.sort.spill.percent</name>
    <value>0.7</value>
  </property>
  <property>
    <name>mapreduce.map.speculative</name>
    <value>false</value>
  </property>
  <property>
    <name>mapreduce.output.fileoutputformat.compress</name>
    <value>false</value>
  </property>
  <property>
    <name>mapreduce.output.fileoutputformat.compress.type</name>
    <value>BLOCK</value>
  </property>
  <property>
    <name>mapreduce.reduce.input.buffer.percent</name>
    <value>0.0</value>
  </property>
  <property>
    <name>mapreduce.reduce.log.level</name>
    <value>INFO</value>
  </property>
  <property>
    <name>mapreduce.reduce.shuffle.fetch.retry.enabled</name>
    <value>1</value>
  </property>
  <property>
    <name>mapreduce.reduce.shuffle.fetch.retry.interval-ms</name>
    <value>1000</value>
  </property>
  <property>
    <name>mapreduce.reduce.shuffle.fetch.retry.timeout-ms</name>
    <value>30000</value>
  </property>
  <property>
    <name>mapreduce.reduce.shuffle.input.buffer.percent</name>
    <value>0.7</value>
  </property>
  <property>
    <name>mapreduce.reduce.shuffle.merge.percent</name>
    <value>0.66</value>
  </property>
  <property>
    <name>mapreduce.reduce.shuffle.parallelcopies</name>
    <value>30</value>
  </property>
  <property>
    <name>mapreduce.reduce.speculative</name>
    <value>false</value>
  </property>
  <property>
    <name>mapreduce.shuffle.port</name>
    <value>13562</value>
  </property>
  <property>
    <name>mapreduce.task.io.sort.factor</name>
    <value>100</value>
  </property>
  <property>
    <name>mapreduce.task.io.sort.mb</name>
    <value>256</value>
  </property>
  <property>
    <name>mapreduce.task.timeout</name>
    <value>300000</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.admin-command-opts</name>
    <value>-Dhdp.version=${hdp.version}</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.log.level</name>
    <value>INFO</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.resource.mb</name>
    <value>512</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.staging-dir</name>
    <value>/user</value>
  </property>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • 218
  • 219
  • 220
  • 221
  • 222
  • 223
  • 224
  • 225
  • 226
  • 227
  • 228
  • 229
  • 230
  • 231
  • 232
  • 233
  • 234
  • 235
  • 236
  • 237
  • mapreduce.map.java.opts=-Xmx12697m 启动 map 任务是 JVM 堆内存大小,如果此值设置过小提交任务时会 JVM 就会抛出 Out of Memory 异常。
  • mapreduce.map.memory.mb=12288 每个 map Container 使用的内存上限,默认为 -1,表示不限制。如果未指定,则从 mapreduce.job.heap.memory-mb.ratio(默认为 0.8) 和 mapreduce.map.java.opts 推断。当 Container 的内存大小超过了这个参数值,NodeManager 会负责 kill 掉 Container。
  • mapreduce.reduce.java.opts=-Xmx16384m,同 mapreduce.map.java.opts。
  • mapreduce.reduce.memory.mb=12288,同 mapreduce.map.memory.mb。
  • yarn.app.mapreduce.am.command-opts=-Xmx8192m -Dhdp.version=${hdp.version},默认值为 -Xmx1024m,App Master 堆内存大小。当提交的任务过多时,此值又设置过大,可能会导致 AM 占用的内存超过 yarn.scheduler.capacity.maximum-am-resource-percent ,当前提交的和后续提交的任务会在队列中等待。
  • 其它配置项详见官方文档 mapred-default.xml

3.7 /etc/hadoop/conf/yarn-env.sh

export HADOOP_YARN_HOME=/usr/hdp/current/hadoop-yarn
export HADOOP_LOG_DIR=/var/log/hadoop-yarn/yarn
export HADOOP_SECURE_LOG_DIR=/var/log/hadoop-yarn/yarn
export HADOOP_PID_DIR=/var/run/hadoop-yarn/yarn
export HADOOP_SECURE_PID_DIR=/var/run/hadoop-yarn/yarn
export HADOOP_LIBEXEC_DIR=/usr/hdp/current/hadoop/libexec
export JAVA_HOME=/usr/local/jdk8
#export JAVA_LIBRARY_PATH="${JAVA_LIBRARY_PATH}:/var/lib/ambari-server/data/tmp/hadoop_java_io_tmpdir"
export HADOOP_LOGLEVEL=${HADOOP_LOGLEVEL:-INFO}
export HADOOP_ROOT_LOGGER=${HADOOP_ROOT_LOGGER:-INFO,console}
export HADOOP_DAEMON_ROOT_LOGGER=${HADOOP_DAEMON_ROOT_LOGGER:-${HADOOP_LOGLEVEL},EWMA,RFA}

# User for YARN daemons
export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn}

# some Java parameters
# export JAVA_HOME=/home/y/libexec/jdk1.6.0/
if [ "$JAVA_HOME" != "" ]; then
#echo "run java in $JAVA_HOME"
JAVA_HOME=$JAVA_HOME
fi
if [ "$JAVA_HOME" = "" ]; then
echo "Error: JAVA_HOME is not set."
exit 1
fi
JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m
YARN_HEAPSIZE=1024
# check envvars which might override default args
if [ "$YARN_HEAPSIZE" != "" ]; then
JAVA_HEAP_MAX="-Xmx""$YARN_HEAPSIZE""m"
fi

export YARN_RESOURCEMANAGER_HEAPSIZE=1024
export YARN_NODEMANAGER_HEAPSIZE=1024
export YARN_TIMELINESERVER_HEAPSIZE=1024
IFS=
# default log directory and file
if [ "$HADOOP_LOG_DIR" = "" ]; then
HADOOP_LOG_DIR="$HADOOP_YARN_HOME/logs"
fi
if [ "$HADOOP_LOGFILE" = "" ]; then
HADOOP_LOGFILE='yarn.log'
fi
# default policy file for service-level authorization
if [ "$YARN_POLICYFILE" = "" ]; then
YARN_POLICYFILE="hadoop-policy.xml"
fi
# restore ordinary behaviour
unset IFS
HADOOP_OPTS="$HADOOP_OPTS -Dyarn.id.str=$YARN_IDENT_STRING"
HADOOP_OPTS="$HADOOP_OPTS -Dyarn.policy.file=$YARN_POLICYFILE"
#HADOOP_OPTS="$HADOOP_OPTS -Djava.io.tmpdir=/var/lib/ambari-server/data/tmp/hadoop_java_io_tmpdir"
export YARN_NODEMANAGER_OPTS="$YARN_NODEMANAGER_OPTS -Dnm.audit.logger=INFO,NMAUDIT"
export YARN_RESOURCEMANAGER_OPTS="$YARN_RESOURCEMANAGER_OPTS -Dyarn.server.resourcemanager.appsummary.logger=INFO,RMSUMMARY -Drm.audit.logger=INFO,RMAUDIT"
export YARN_REGISTRYDNS_SECURE_USER=yarn
export YARN_REGISTRYDNS_SECURE_EXTRA_OPTS="-jvm server"
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • HADOOP_LOG_DIRHADOOP_SECURE_LOG_DIR 设置到数据盘。
  • YARN_RESOURCEMANAGER_HEAPSIZE=3072 值可以适当调大,例如 3072。
  • YARN_NODEMANAGER_HEAPSIZE=3072 值可以适当调大,例如 3072。
  • YARN_TIMELINESERVER_HEAPSIZE=8072 值可以适当调大,例如 8072。

3.8 /etc/hadoop/conf/capacity-scheduler.xml

capacity 调度配置文件,这里主要关注如下配置,其它默认或根据实际情况修改即可。这个配置为 AM 最大使用的 YARN 总内存的比值,默认为 0.1,如果当前 YARN 中运行的有流式任务或者同时运行的任务比较多,可以适当调大这个值,例如设置为 0.4 或者 0.5。

  <property>
    <name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
    <value>0.4</value>
  </property>
  • 1
  • 2
  • 3
  • 4

3.9 /etc/hadoop/conf/yarn-site.xml

  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value>node01</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <!--<value>mapreduce_shuffle,spark2_shuffle,timeline_collector</value>-->
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
  </property>
  <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>/hadoop/yarn/local</value>
  </property>
  <property>
    <name>yarn.nodemanager.log-dirs</name>
    <value>/hadoop/yarn/log</value>
  </property>
  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.log.server.url</name>
    <value>http://node01:19888/jobhistory/logs</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address</name>
    <value>node01:8090</value>
  </property>
  <property>
    <name>yarn.application.classpath</name>
    <value>
      $HADOOP_CONF_DIR,
      /usr/hdp/current/hadoop/*,
      /usr/hdp/current/hadoop/lib/*,
      /usr/hdp/current/hadoop-hdfs/*,
      /usr/hdp/current/hadoop-hdfs/lib/*,
      /usr/hdp/current/hadoop-yarn/*,
      /usr/hdp/current/hadoop-yarn/lib/*
    </value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>4096</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-vcores</name>
    <value>2</value>
  </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>256</value>
  </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-vcores</name>
    <value>1</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>5000</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.cpu-vcores</name>
    <value>8</value>
  </property>

  <property>
    <name>hadoop.http.authentication.type</name>
    <value>simple</value>
  </property>
  <!--<property>
    <name>hadoop.http.cross-origin.allowed-origins</name>
    <value>regex:.*[.]bdm1[.]com(:\d*)?</value>
  </property>-->
  <property>
    <name>hadoop.registry.dns.bind-address</name>
    <value>0.0.0.0</value>
  </property>
  <property>
    <name>hadoop.registry.dns.bind-port</name>
    <value>53</value>
  </property>
  <property>
    <name>hadoop.registry.dns.domain-name</name>
    <value>EXAMPLE.COM</value>
  </property>
  <property>
    <name>hadoop.registry.dns.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hadoop.registry.dns.zone-mask</name>
    <value>255.255.255.0</value>
  </property>
  <property>
    <name>hadoop.registry.dns.zone-subnet</name>
    <value>172.17.0.0</value>
  </property>
  <property>
    <name>hadoop.registry.zk.quorum</name>
    <value>node01:2181</value>
  </property>
  <property>
    <name>manage.include.files</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.acl.enable</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.admin.acl</name>
    <value>activity_analyzer,yarn</value>
  </property>
  <property>
    <name>yarn.client.nodemanager-connect.max-wait-ms</name>
    <value>60000</value>
  </property>
  <property>
    <name>yarn.client.nodemanager-connect.retry-interval-ms</name>
    <value>10000</value>
  </property>
  <property>
    <name>yarn.http.policy</name>
    <value>HTTP_ONLY</value>
  </property>
  <property>
    <name>yarn.log-aggregation.retain-seconds</name>
    <value>2592000</value>
  </property>
  <property>
    <name>yarn.log.server.web-service.url</name>
    <value>http://node01:8188/ws/v1/applicationhistory</value>
  </property>
  <property>
    <name>yarn.node-labels.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.node-labels.fs-store.retry-policy-spec</name>
    <value>2000, 500</value>
  </property>
  <property>
    <name>yarn.node-labels.fs-store.root-dir</name>
    <value>/system/yarn/node-labels</value>
  </property>
  <property>
    <name>yarn.nodemanager.address</name>
    <value>0.0.0.0:45454</value>
  </property>
  <property>
    <name>yarn.nodemanager.admin-env</name>
    <value>MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.spark2_shuffle.class</name>
    <value>org.apache.spark.network.yarn.YarnShuffleService</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.spark2_shuffle.classpath</name>
    <value>/usr/hdp/current/spark2/aux/*</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.spark_shuffle.class</name>
    <value>org.apache.spark.network.yarn.YarnShuffleService</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.spark_shuffle.classpath</name>
    <value>/usr/hdp/current/spark/aux/*</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.timeline_collector.class</name>
    <value>org.apache.hadoop.yarn.server.timelineservice.collector.PerNodeTimelineCollectorsAuxService</value>
  </property>
  <property>
    <name>yarn.nodemanager.bind-host</name>
    <value>0.0.0.0</value>
  </property>
  <property>
    <name>yarn.nodemanager.container-executor.class</name>
    <value>org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor</value>
  </property>
  <property>
    <name>yarn.nodemanager.container-metrics.unregister-delay-ms</name>
    <value>60000</value>
  </property>
  <property>
    <name>yarn.nodemanager.container-monitor.interval-ms</name>
    <value>3000</value>
  </property>
  <property>
    <name>yarn.nodemanager.delete.debug-delay-sec</name>
    <value>0</value>
  </property>
  <property>
    <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
    <value>90</value>
  </property>
  <property>
    <name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
    <value>1000</value>
  </property>
  <property>
    <name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name>
    <value>0.25</value>
  </property>
  <property>
    <name>yarn.nodemanager.health-checker.interval-ms</name>
    <value>135000</value>
  </property>
  <property>
    <name>yarn.nodemanager.health-checker.script.timeout-ms</name>
    <value>60000</value>
  </property>
  <property>
    <name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.nodemanager.linux-container-executor.group</name>
    <value>hadoop</value>
  </property>
  <property>
    <name>yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.nodemanager.log-aggregation.compression-type</name>
    <value>gz</value>
  </property>
  <property>
    <name>yarn.nodemanager.log-aggregation.debug-enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.nodemanager.log-aggregation.num-log-files-per-app</name>
    <value>30</value>
  </property>
  <property>
    <name>yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds</name>
    <value>3600</value>
  </property>
  <property>
    <name>yarn.nodemanager.log.retain-seconds</name>
    <value>1209600</value>
  </property>
  <property>
    <name>yarn.nodemanager.recovery.dir</name>
    <value>/var/log/hadoop-yarn/nodemanager/recovery-state</value>
  </property>
  <property>
    <name>yarn.nodemanager.recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.nodemanager.recovery.supervised</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/hadoop/app-logs</value>
  </property>
  <property>
    <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
    <value>logs</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource-plugins</name>
    <value></value>
  </property>
  <property>
    <name>yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices</name>
    <value></value>
  </property>
  <property>
    <name>yarn.nodemanager.resource-plugins.gpu.docker-plugin</name>
    <value></value>
  </property>
  <property>
    <name>yarn.nodemanager.resource-plugins.gpu.docker-plugin.nvidiadocker-v1.endpoint</name>
    <value></value>
  </property>
  <property>
    <name>yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables</name>
    <value></value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.pcores-vcores-multiplier</name>
    <value>2</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.percentage-physical-cpu-limit</name>
    <value>80</value>
  </property>
  <property>
    <name>yarn.nodemanager.runtime.linux.allowed-runtimes</name>
    <value>default,docker</value>
  </property>
  <property>
    <name>yarn.nodemanager.runtime.linux.docker.allowed-container-networks</name>
    <value>host,none,bridge</value>
  </property>
  <property>
    <name>yarn.nodemanager.runtime.linux.docker.capabilities</name>
    <value>
    CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID,SETUID,SETFCAP,
    SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE</value>
  </property>
  <property>
    <name>yarn.nodemanager.runtime.linux.docker.default-container-network</name>
    <value>host</value>
  </property>
  <property>
    <name>yarn.nodemanager.runtime.linux.docker.privileged-containers.acl</name>
    <value></value>
  </property>
  <property>
    <name>yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-pmem-ratio</name>
    <value>2.1</value>
  </property>
  <property>
    <name>yarn.nodemanager.webapp.cross-origin.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address</name>
    <value>node01:8050</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address</name>
    <value>node01:8141</value>
  </property>
  <property>
    <name>yarn.resourcemanager.am.max-attempts</name>
    <value>2</value>
  </property>
  <property>
    <name>yarn.resourcemanager.bind-host</name>
    <value>0.0.0.0</value>
  </property>
  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>yarn-cluster</value>
  </property>
  <property>
    <name>yarn.resourcemanager.connect.max-wait.ms</name>
    <value>900000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.connect.retry-interval.ms</name>
    <value>30000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.display.per-user-apps</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.fs.state-store.retry-policy-spec</name>
    <value>2000, 500</value>
  </property>
  <property>
    <name>yarn.resourcemanager.fs.state-store.uri</name>
    <value> </value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name>
    <value>/yarn-leader-election</value>
  </property>

  <property>
    <name>yarn.resourcemanager.monitor.capacity.preemption.intra-queue-preemption.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.monitor.capacity.preemption.monitoring_interval</name>
    <value>15000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.monitor.capacity.preemption.natural_termination_factor</name>
    <value>1</value>
  </property>
  <property>
    <name>yarn.resourcemanager.monitor.capacity.preemption.total_preemption_per_round</name>
    <value>0.1</value>
  </property>
  <!--<property>
    <name>yarn.resourcemanager.nodes.exclude-path</name>
    <value>/etc/hadoop/conf/yarn.exclude</value>
  </property>-->
  <property>
    <name>yarn.resourcemanager.placement-constraints.handler</name>
    <value>scheduler</value>
  </property>
  <property>
    <name>yarn.resourcemanager.recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address</name>
    <value>node01:8025</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>node01:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.monitor.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.state-store.max-completed-applications</name>
    <value>${yarn.resourcemanager.max-completed-applications}</value>
  </property>
  <property>
    <name>yarn.resourcemanager.store.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
  </property>
  <property>
    <name>yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size</name>
    <value>10</value>
  </property>
  <property>
    <name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address</name>
    <value>node01:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.cross-origin.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms</name>
    <value>10000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-acl</name>
    <value>world:anyone:rwcda</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-address</name>
    <value>node01:2181</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-num-retries</name>
    <value>1000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-retry-interval-ms</name>
    <value>1000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-state-store.parent-path</name>
    <value>/rmstore</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-timeout-ms</name>
    <value>10000</value>
  </property>
  <property>
    <name>yarn.rm.system-metricspublisher.emit-container-events</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.scheduler.capacity.ordering-policy.priority-utilization.underutilized-preemption.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.service.framework.path</name>
    <value>/hdp/apps/${hdp.version}/hadoop-yarn/lib/service-dep.tar.gz</value>
  </property>
  <property>
    <name>yarn.service.system-service.dir</name>
    <value>/services</value>
  </property>
  <property>
    <name>yarn.system-metricspublisher.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.timeline-service.address</name>
    <value>node01:10200</value>
  </property>
  <property>
    <name>yarn.timeline-service.bind-host</name>
    <value>0.0.0.0</value>
  </property>
  <property>
    <name>yarn.timeline-service.client.max-retries</name>
    <value>30</value>
  </property>
  <property>
    <name>yarn.timeline-service.client.retry-interval-ms</name>
    <value>1000</value>
  </property>
  <property>
    <name>yarn.timeline-service.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.timeline-service.entity-group-fs-store.active-dir</name>
    <value>/ats/active/</value>
  </property>
  <property>
    <name>yarn.timeline-service.entity-group-fs-store.app-cache-size</name>
    <value>10</value>
  </property>
  <property>
    <name>yarn.timeline-service.entity-group-fs-store.cleaner-interval-seconds</name>
    <value>3600</value>
  </property>
  <property>
    <name>yarn.timeline-service.entity-group-fs-store.done-dir</name>
    <value>/ats/done/</value>
  </property>
  <property>
    <name>yarn.timeline-service.entity-group-fs-store.group-id-plugin-classes</name>
    <value>org.apache.hadoop.yarn.applications.distributedshell.DistributedShellTimelinePlugin</value>
  </property>
  <property>
    <name>yarn.timeline-service.entity-group-fs-store.group-id-plugin-classpath</name>
    <value></value>
  </property>
  <property>
    <name>yarn.timeline-service.entity-group-fs-store.retain-seconds</name>
    <value>604800</value>
  </property>
  <property>
    <name>yarn.timeline-service.entity-group-fs-store.scan-interval-seconds</name>
    <value>60</value>
  </property>
  <property>
    <name>yarn.timeline-service.entity-group-fs-store.summary-store</name>
    <value>org.apache.hadoop.yarn.server.timeline.RollingLevelDBTimelineStore</value>
  </property>
  <property>
    <name>yarn.timeline-service.generic-application-history.save-non-am-container-meta-info</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.timeline-service.generic-application-history.store-class</name>
    <value>org.apache.hadoop.yarn.server.applicationhistoryservice.NullApplicationHistoryStore</value>
  </property>
  <property>
    <name>yarn.timeline-service.hbase-schema.prefix</name>
    <value>prod.</value>
  </property>
  <property>
    <name>yarn.timeline-service.hbase.configuration.file</name>
    <value>file:///usr/hdp/${hdp.version}/hadoop/conf/embedded-yarn-ats-hbase/hbase-site.xml</value>
  </property>
  <property>
    <name>yarn.timeline-service.hbase.coprocessor.jar.hdfs.location</name>
    <value>file:///usr/hdp/${hdp.version}/hadoop-yarn/timelineservice/hadoop-yarn-server-timelineservice-hbase-coprocessor.jar</value>
  </property>
  <property>
    <name>yarn.timeline-service.http-authentication.proxyuser.root.groups</name>
    <value>*</value>
  </property>
  <property>
    <name>yarn.timeline-service.http-authentication.proxyuser.root.hosts</name>
    <value>node01</value>
  </property>
  <property>
    <name>yarn.timeline-service.http-authentication.simple.anonymous.allowed</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.timeline-service.http-authentication.type</name>
    <value>simple</value>
  </property>
  <property>
    <name>yarn.timeline-service.http-cross-origin.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.timeline-service.leveldb-state-store.path</name>
    <value>/hadoop/yarn/timeline</value>
  </property>
  <property>
    <name>yarn.timeline-service.leveldb-timeline-store.path</name>
    <value>/hadoop/yarn/timeline</value>
  </property>
  <property>
    <name>yarn.timeline-service.leveldb-timeline-store.read-cache-size</name>
    <value>104857600</value>
  </property>
  <property>
    <name>yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size</name>
    <value>10000</value>
  </property>
  <property>
    <name>yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size</name>
    <value>10000</value>
  </property>
  <property>
    <name>yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms</name>
    <value>300000</value>
  </property>
  <property>
    <name>yarn.timeline-service.reader.webapp.address</name>
    <value>node01:8198</value>
  </property>
  <property>
    <name>yarn.timeline-service.reader.webapp.https.address</name>
    <value>node01:8199</value>
  </property>
  <property>
    <name>yarn.timeline-service.recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.timeline-service.state-store-class</name>
    <value>org.apache.hadoop.yarn.server.timeline.recovery.LeveldbTimelineStateStore</value>
  </property>
  <property>
    <name>yarn.timeline-service.store-class</name>
    <value>org.apache.hadoop.yarn.server.timeline.EntityGroupFSTimelineStore</value>
  </property>
  <property>
    <name>yarn.timeline-service.ttl-enable</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.timeline-service.ttl-ms</name>
    <value>2678400000</value>
  </property>
  <property>
    <name>yarn.timeline-service.version</name>
    <value>2.0f</value>
  </property>
  <property>
    <name>yarn.timeline-service.versions</name>
    <value>1.5f,2.0f</value>
  </property>
  <property>
    <name>yarn.timeline-service.webapp.address</name>
    <value>node01:8188</value>
  </property>
  <property>
    <name>yarn.timeline-service.webapp.https.address</name>
    <value>node01:8190</value>
  </property>

  <property>
    <name>yarn.webapp.api-service.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.webapp.ui2.enable</name>
    <value>true</value>
  </property>

  <!--
  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2</value>
  </property>
  <property>
    <name>yarn.resourcemanager.hostname.rm1</name>
    <value>bdm0</value>
  </property>
  <property>
    <name>yarn.resourcemanager.hostname.rm2</name>
    <value>bdm1</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm1</name>
    <value>bdm0:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm2</name>
    <value>bdm1:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm1</name>
    <value>bdm0:8090</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm2</name>
    <value>bdm1:8090</value>
  </property>
  -->
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • 218
  • 219
  • 220
  • 221
  • 222
  • 223
  • 224
  • 225
  • 226
  • 227
  • 228
  • 229
  • 230
  • 231
  • 232
  • 233
  • 234
  • 235
  • 236
  • 237
  • 238
  • 239
  • 240
  • 241
  • 242
  • 243
  • 244
  • 245
  • 246
  • 247
  • 248
  • 249
  • 250
  • 251
  • 252
  • 253
  • 254
  • 255
  • 256
  • 257
  • 258
  • 259
  • 260
  • 261
  • 262
  • 263
  • 264
  • 265
  • 266
  • 267
  • 268
  • 269
  • 270
  • 271
  • 272
  • 273
  • 274
  • 275
  • 276
  • 277
  • 278
  • 279
  • 280
  • 281
  • 282
  • 283
  • 284
  • 285
  • 286
  • 287
  • 288
  • 289
  • 290
  • 291
  • 292
  • 293
  • 294
  • 295
  • 296
  • 297
  • 298
  • 299
  • 300
  • 301
  • 302
  • 303
  • 304
  • 305
  • 306
  • 307
  • 308
  • 309
  • 310
  • 311
  • 312
  • 313
  • 314
  • 315
  • 316
  • 317
  • 318
  • 319
  • 320
  • 321
  • 322
  • 323
  • 324
  • 325
  • 326
  • 327
  • 328
  • 329
  • 330
  • 331
  • 332
  • 333
  • 334
  • 335
  • 336
  • 337
  • 338
  • 339
  • 340
  • 341
  • 342
  • 343
  • 344
  • 345
  • 346
  • 347
  • 348
  • 349
  • 350
  • 351
  • 352
  • 353
  • 354
  • 355
  • 356
  • 357
  • 358
  • 359
  • 360
  • 361
  • 362
  • 363
  • 364
  • 365
  • 366
  • 367
  • 368
  • 369
  • 370
  • 371
  • 372
  • 373
  • 374
  • 375
  • 376
  • 377
  • 378
  • 379
  • 380
  • 381
  • 382
  • 383
  • 384
  • 385
  • 386
  • 387
  • 388
  • 389
  • 390
  • 391
  • 392
  • 393
  • 394
  • 395
  • 396
  • 397
  • 398
  • 399
  • 400
  • 401
  • 402
  • 403
  • 404
  • 405
  • 406
  • 407
  • 408
  • 409
  • 410
  • 411
  • 412
  • 413
  • 414
  • 415
  • 416
  • 417
  • 418
  • 419
  • 420
  • 421
  • 422
  • 423
  • 424
  • 425
  • 426
  • 427
  • 428
  • 429
  • 430
  • 431
  • 432
  • 433
  • 434
  • 435
  • 436
  • 437
  • 438
  • 439
  • 440
  • 441
  • 442
  • 443
  • 444
  • 445
  • 446
  • 447
  • 448
  • 449
  • 450
  • 451
  • 452
  • 453
  • 454
  • 455
  • 456
  • 457
  • 458
  • 459
  • 460
  • 461
  • 462
  • 463
  • 464
  • 465
  • 466
  • 467
  • 468
  • 469
  • 470
  • 471
  • 472
  • 473
  • 474
  • 475
  • 476
  • 477
  • 478
  • 479
  • 480
  • 481
  • 482
  • 483
  • 484
  • 485
  • 486
  • 487
  • 488
  • 489
  • 490
  • 491
  • 492
  • 493
  • 494
  • 495
  • 496
  • 497
  • 498
  • 499
  • 500
  • 501
  • 502
  • 503
  • 504
  • 505
  • 506
  • 507
  • 508
  • 509
  • 510
  • 511
  • 512
  • 513
  • 514
  • 515
  • 516
  • 517
  • 518
  • 519
  • 520
  • 521
  • 522
  • 523
  • 524
  • 525
  • 526
  • 527
  • 528
  • 529
  • 530
  • 531
  • 532
  • 533
  • 534
  • 535
  • 536
  • 537
  • 538
  • 539
  • 540
  • 541
  • 542
  • 543
  • 544
  • 545
  • 546
  • 547
  • 548
  • 549
  • 550
  • 551
  • 552
  • 553
  • 554
  • 555
  • 556
  • 557
  • 558
  • 559
  • 560
  • 561
  • 562
  • 563
  • 564
  • 565
  • 566
  • 567
  • 568
  • 569
  • 570
  • 571
  • 572
  • 573
  • 574
  • 575
  • 576
  • 577
  • 578
  • 579
  • 580
  • 581
  • 582
  • 583
  • 584
  • 585
  • 586
  • 587
  • 588
  • 589
  • 590
  • 591
  • 592
  • 593
  • 594
  • 595
  • 596
  • 597
  • 598
  • 599
  • 600
  • 601
  • 602
  • 603
  • 604
  • 605
  • 606
  • 607
  • 608
  • 609
  • 610
  • 611
  • 612
  • 613
  • 614
  • 615
  • 616
  • 617
  • 618
  • 619
  • 620
  • 621
  • 622
  • 623
  • 624
  • 625
  • 626
  • 627
  • 628
  • 629
  • 630
  • 631
  • 632
  • 633
  • 634
  • 635
  • 636
  • 637
  • 638
  • 639
  • 640
  • 641
  • 642
  • 643
  • 644
  • 645
  • 646
  • 647
  • 648
  • 649
  • 650
  • 651
  • 652
  • 653
  • 654
  • 655
  • 656
  • 657
  • 658
  • 659
  • 660
  • 661
  • 662
  • 663
  • 664
  • 665
  • 666
  • 667
  • 668
  • 669
  • 670
  • 671
  • 672
  • 673
  • 674
  • 675
  • 676
  • 677
  • 678
  • 679
  • 680
  • 681
  • 682
  • 683
  • 684
  • 685
  • 686
  • 687
  • 688
  • 689
  • 690
  • 691
  • 692
  • 693
  • 694
  • 695
  • 696
  • 697
  • 698
  • 699
  • 700
  • 701
  • 702
  • 703
  • 704
  • 705
  • 706
  • 707
  • 708
  • 709
  • 710
  • 711
  • 712
  • 713
  • yarn.nodemanager.resource.cpu-vcores=64 每个nodemanager可以使用的核数资源,默认为 -1,例如这里设置为物理核数的两倍。

  • yarn.nodemanager.resource.memory-mb=131072 每个nodemanager可以使用的内存资源,如果该值为 -1 时,并且配置项 yarn.nodemanager.resource.detect-hardware-capabilities=true,则会自动计算,其他情况下默认值为 8192MB。例如这里设置为 128GB。

  • yarn.scheduler.minimum-allocation-vcores=1 单个Container可申请的最小虚拟核数,例如这里指定为 1。

  • yarn.scheduler.minimum-allocation-mb=2048 单个Container可申请的最小内存资源,例如这里指定为 2048。

  • yarn.scheduler.maximum-allocation-vcores=8 单个Container可申请的最大虚拟核数,例如这里指定为 8。

  • yarn.scheduler.maximum-allocation-mb=30720 单个Container可申请的最多内存资源,例如这里指定为 30720。

  • yarn.timeline-service.enabled=false 是否启用 timeline 服务,这里设置为 false,不启用此服务。

  • yarn.nodemanager.log-aggregation.compression-type=gz 日志的压缩类型算法,默认为 none,这里设置为 gz。

  • 其它配置项详见官方文档 yarn-default.xml

3.10 /etc/zookeeper/conf/zookeeper-env.sh

export JAVA_HOME=/usr/local/jdk8
export ZOOKEEPER_HOME=/usr/hdp/current/zookeeper
export ZOO_LOG_DIR=/var/log/zookeeper
export ZOOPIDFILE=/var/run/zookeeper/zookeeper_server.pid
export SERVER_JVMFLAGS=-Xmx256m
export JAVA=$JAVA_HOME/bin/java
export CLASSPATH=$CLASSPATH:/usr/share/zookeeper/*
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • ZOO_LOG_DIR 日志可以设置到某个数据盘。
  • SERVER_JVMFLAGS 最大堆内存可设置为 -Xmx1024m

3.11 /etc/zookeeper/conf/zoo.cfg

tickTime=2000
maxClientCnxns=50
initLimit=10
syncLimit=5
dataDir=/var/lib/zookeeper
clientPort=2181
autopurge.snapRetainCount=5
autopurge.purgeInterval=24
admin.enableServer=false
server.1=node01:2887:3887
#...
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11

3.12 /etc/tez/conf/tez-env.sh

export TEZ_CONF_DIR=/etc/tez/conf/
export HADOOP_HOME=${HADOOP_HOME:-/usr}
export JAVA_HOME=/usr/local/jdk8
  • 1
  • 2
  • 3

3.13 /etc/tez/conf/tez-site.xml

  <property>
    <name>tez.task.resource.memory.mb</name>
    <value>512</value>
  </property>
  <property>
    <name>tez.am.resource.memory.mb</name>
    <value>512</value>
  </property>
  <property>
    <name>tez.counters.max</name>
    <value>10000</value>
  </property>
  <property>
    <name>tez.lib.uris</name>
    <value>/hdp/apps/3.1.5.0-152/tez/tez.tar.gz</value>
  </property>
  <property>
    <name>tez.runtime.io.sort.mb</name>
    <value>256</value>
  </property>
  <property>
    <name>tez.am.java.opts</name>
    <value>-server -Xmx512m -Djava.net.preferIPv4Stack=true</value>
  </property>
  <property>
    <name>tez.am.launch.env</name>
    <value>LD_LIBRARY_PATH=/usr/hdp/current/hadoop/lib/native:/usr/hdp/current/hadoop/lib/native/Linux-amd64-64</value>
  </property>
  <property>
    <name>tez.cluster.additional.classpath.prefix</name>
    <value>/usr/hdp/current/hadoop/lib/hadoop-lzo-0.6.0.${hdp.version}.jar:/etc/hadoop/conf/secure</value>
  </property>
  <property>
    <name>tez.task.launch.cmd-opts</name>
    <value>-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseG1GC -XX:+ResizeTLAB</value>
  </property>
  <property>
    <name>tez.task.launch.env</name>
    <value>LD_LIBRARY_PATH=/usr/hdp/current/hadoop/lib/native:/usr/hdp/current/hadoop/lib/native/Linux-amd64-64</value>
  </property>

  <property>
    <name>tez.am.am-rm.heartbeat.interval-ms.max</name>
    <value>250</value>
  </property>
  <property>
    <name>tez.am.container.idle.release-timeout-max.millis</name>
    <value>20000</value>
  </property>
  <property>
    <name>tez.am.container.idle.release-timeout-min.millis</name>
    <value>10000</value>
  </property>
  <property>
    <name>tez.am.container.reuse.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>tez.am.container.reuse.locality.delay-allocation-millis</name>
    <value>250</value>
  </property>
  <property>
    <name>tez.am.container.reuse.non-local-fallback.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>tez.am.container.reuse.rack-fallback.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>tez.am.launch.cluster-default.cmd-opts</name>
    <value>-server -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
  </property>
  <property>
    <name>tez.am.launch.cmd-opts</name>
    <value>-XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps -XX:+UseNUMA -XX:+UseG1GC -XX:+ResizeTLAB</value>
  </property>
  <property>
    <name>tez.am.log.level</name>
    <value>INFO</value>
  </property>
  <property>
    <name>tez.am.max.app.attempts</name>
    <value>2</value>
  </property>
  <property>
    <name>tez.am.maxtaskfailures.per.node</name>
    <value>10</value>
  </property>
  <property>
    <name>tez.am.tez-ui.history-url.template</name>
    <value>__HISTORY_URL_BASE__?viewPath=%2F%23%2Ftez-app%2F__APPLICATION_ID__</value>
  </property>
  <property>
    <name>tez.am.view-acls</name>
    <value>*</value>
  </property>
  <property>
    <name>tez.counters.max.groups</name>
    <value>3000</value>
  </property>
  <property>
    <name>tez.generate.debug.artifacts</name>
    <value>false</value>
  </property>
  <property>
    <name>tez.grouping.max-size</name>
    <value>1073741824</value>
  </property>
  <property>
    <name>tez.grouping.min-size</name>
    <value>16777216</value>
  </property>
  <property>
    <name>tez.grouping.split-waves</name>
    <value>1.7</value>
  </property>
  <property>
    <name>tez.history.logging.proto-base-dir</name>
    <value>/warehouse/tablespace/external/hive/sys.db</value>
  </property>
  <property>
    <name>tez.history.logging.service.class</name>
    <value>org.apache.tez.dag.history.logging.proto.ProtoHistoryLoggingService</value>
  </property>
  <property>
    <name>tez.history.logging.timeline-cache-plugin.old-num-dags-per-group</name>
    <value>5</value>
  </property>
  <property>
    <name>tez.queue.name</name>
    <value>default</value>
  </property>
  <property>
    <name>tez.runtime.compress</name>
    <value>true</value>
  </property>
  <property>
    <name>tez.runtime.compress.codec</name>
    <value>org.apache.hadoop.io.compress.SnappyCodec</value>
  </property>
  <property>
    <name>tez.runtime.convert.user-payload.to.history-text</name>
    <value>false</value>
  </property>
  <property>
    <name>tez.runtime.optimize.local.fetch</name>
    <value>true</value>
  </property>
  <property>
    <name>tez.runtime.pipelined.sorter.sort.threads</name>
    <value>2</value>
  </property>
  <property>
    <name>tez.runtime.shuffle.fetch.buffer.percent</name>
    <value>0.6</value>
  </property>
  <property>
    <name>tez.runtime.shuffle.memory.limit.percent</name>
    <value>0.25</value>
  </property>
  <property>
    <name>tez.runtime.sorter.class</name>
    <value>PIPELINED</value>
  </property>
  <property>
    <name>tez.runtime.unordered.output.buffer.size-mb</name>
    <value>768</value>
  </property>
  <property>
    <name>tez.session.am.dag.submit.timeout.secs</name>
    <value>600</value>
  </property>
  <property>
    <name>tez.session.client.timeout.secs</name>
    <value>-1</value>
  </property>
  <property>
    <name>tez.shuffle-vertex-manager.max-src-fraction</name>
    <value>0.4</value>
  </property>
  <property>
    <name>tez.shuffle-vertex-manager.min-src-fraction</name>
    <value>0.2</value>
  </property>
  <property>
    <name>tez.staging-dir</name>
    <value>/tmp/${user.name}/staging</value>
  </property>
  <property>
    <name>tez.task.am.heartbeat.counter.interval-ms.max</name>
    <value>4000</value>
  </property>
  <property>
    <name>tez.task.generate.counters.per.io</name>
    <value>true</value>
  </property>
  <property>
    <name>tez.task.get-task.sleep.interval-ms.max</name>
    <value>200</value>
  </property>
  <property>
    <name>tez.task.launch.cluster-default.cmd-opts</name>
    <value>-server -Djava.net.preferIPv4Stack=true -Dhdp.version=${hdp.version}</value>
  </property>
  <property>
    <name>tez.task.max-events-per-heartbeat</name>
    <value>500</value>
  </property>
  <property>
    <name>tez.use.cluster.hadoop-libs</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.timeline-service.enabled</name>
    <value>false</value>
  </property>
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • tez.task.resource.memory.mb=8192 默认为 1024,tez 任务使用的内存大小,这里设置为 8192,这个值适当调大有利于性能的提升。
  • tez.am.resource.memory.mb=5120 默认为 1024,tez 任务的 AppMaster 要使用的内存量,这里设置为 5120。
  • tez.counters.max=10000,高级配置,默认值为 1200,限制每个 dag 的数量(AppMaster 和 Task),例如这里设置为 10000。
  • tez.lib.uris=/hdp/apps/3.1.5.0-152/tez/tez.tar.gz,必填项,HDFS 上的路径,需要将 /usr/hdp/current/tez/lib/tez.tar.gz 资源上传到这里配置的 HDFS 路径上。
  • tez.runtime.io.sort.mb=2703 这里设置为 2703。
  • tez.am.java.opts=-server -Xmx8192m -Djava.net.preferIPv4Stack=true
  • 其它配置项详见官方文档 TezConfiguration.html

3.14 /etc/hive/conf/hive-env.sh


if [ "$SERVICE" = "metastore" ]; then
  export HADOOP_HEAPSIZE=12288 # Setting for HiveMetastore
  export HADOOP_OPTS="$HADOOP_OPTS -Xloggc:/var/log/hive/hivemetastore-gc-%t.log -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCCause -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/hive/hms_heapdump.hprof -Dhive.log.dir=/var/log/hive -Dhive.log.file=hivemetastore.log  -Duser.timezone=Asia/Shanghai"
fi

if [ "$SERVICE" = "hiveserver2" ]; then
  export HADOOP_HEAPSIZE=12288 # Setting for HiveServer2 and Client
  export HADOOP_OPTS="$HADOOP_OPTS -Xloggc:/var/log/hive/hiveserver2-gc-%t.log -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCCause -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/var/log/hive/hs2_heapdump.hprof -Dhive.log.dir=/var/log/hive -Dhive.log.file=hiveserver2.log  -Duser.timezone=Asia/Shanghai"
fi

export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS  -Xmx${HADOOP_HEAPSIZE}m"
export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS"
HADOOP_HOME=${HADOOP_HOME:-/usr/hdp/current/hadoop}
export HIVE_HOME=${HIVE_HOME:-/usr/hdp/current/hive}
export HIVE_CONF_DIR=${HIVE_CONF_DIR:-/usr/hdp/current/hive/conf}
if [ "${HIVE_AUX_JARS_PATH}" != "" ]; then
  if [ -f "${HIVE_AUX_JARS_PATH}" ]; then
    export HIVE_AUX_JARS_PATH=${HIVE_AUX_JARS_PATH}
  elif [ -d "/usr/hdp/current/hive-hcatalog/share/hcatalog" ]; then
    export HIVE_AUX_JARS_PATH=/usr/hdp/current/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar
  fi
elif [ -d "/usr/hdp/current/hive-hcatalog/share/hcatalog" ]; then
  export HIVE_AUX_JARS_PATH=/usr/hdp/current/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar
fi
export METASTORE_PORT=9083
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • HADOOP_HEAPSIZE 对于 metastore 和 hiveserver2 堆内存可以适当调大,如果过小请求执行的任务多时可能会卡死,例如这里设置为 12288。
  • HADOOP_OPTS 为了方式时区问题,在 metastore 和 hiveserver2 中添加 -Duser.timezone=Asia/Shanghai 参数,设置为中国东八区。

3.15 /etc/hive/conf/hive-exec-log4j2.properties

参考 /etc/hive/conf/hive-exec-log4j2.properties.template

3.16 /etc/hive/conf/hive-log4j2.properties

参考 /etc/hive/conf/hive-log4j2.properties.template

3.17 /etc/hive/conf/hive-site.xml

  <property>
    <name>hive.server2.thrift.bind.host</name>
    <value>node01</value>
  </property>
  <property>
    <name>hive.metastore.uris</name>
    <value>thrift://node01:9083</value>
  </property>
  <property>
    <name>hive.metastore.warehouse.dir</name>
    <value>/warehouse/tablespace/managed/hive</value>
  </property>
  <property>
    <name>hive.metastore.warehouse.external.dir</name>
    <value>/warehouse/tablespace/external/hive</value>
  </property>
  <property>
    <name>hive.metastore.db.type</name>
    <value>mysql</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.jdbc.Driver</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://node01:3306/hive?createDatabaseIfNotExist=true&amp;useUnicode=true&amp;characterEncoding=UTF-8&amp;useSSL=false</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>hive</value>
  </property>
  <property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>123456</value>
  </property>
  <property>
    <name>hive.server2.thrift.port</name>
    <value>10000</value>
  </property>
  <property>
    <name>hive.tez.container.size</name>
    <value>512</value>
  </property>
  <property>
    <name>hive.heapsize</name>
    <value>512</value>
  </property>
  <property>
    <name>hive.server2.logging.operation.log.location</name>
    <value>/tmp/hive/operation_logs</value>
  </property>
  <property>
    <name>datanucleus.schema.autoCreateAll</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.metastore.schema.verification</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.exec.local.scratchdir</name>
    <value>/hadoop/hive/exec/${user.name}</value>
  </property>
  <property>
    <name>hive.downloaded.resources.dir</name>
    <value>/hadoop/hive/${hive.session.id}_resources</value>
  </property>
  <property>
    <name>hive.querylog.location</name>
    <value>/hadoop/hive/log</value>
  </property>
  <property>
    <name>hive.server2.logging.operation.log.location</name>
    <value>/hadoop/hive/server2/${user.name}/operation_logs</value>
  </property>
  <property>
    <name>hive.exec.dynamic.partition.mode</name>
    <value>nonstrict</value>
  </property>
  <property>
    <name>hive.server2.authentication</name>
    <value>NONE</value>
  </property>
  <property>
    <name>hive.server2.thrift.client.user</name>
    <value>hive</value>
  </property>
  <property>
    <name>hive.server2.thrift.client.password</name>
    <value>hive</value>
  </property>
  <property>
    <name>hive.server2.tez.initialize.default.sessions</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.metastore.event.listeners</name>
    <value></value>
  </property>
  
  <property>
    <name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
    <value>node01:2181</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>node01</value>
  </property>
  <property>
    <name>hive.cluster.delegation.token.store.class</name>
    <value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
  </property>
  <property>
    <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
    <value>/hive/cluster/delegation</value>
  </property>
  <property>
    <name>hive.server2.zookeeper.namespace</name>
    <value>hiveserver2</value>
  </property>
  <property>
    <name>hive.zookeeper.client.port</name>
    <value>2181</value>
  </property>
  <property>
    <name>hive.zookeeper.namespace</name>
    <value>hive_zookeeper_namespace</value>
  </property>
  <property>
    <name>hive.zookeeper.quorum</name>
    <value>node01:2181</value>
  </property>


  <property>
    <name>atlas.hook.hive.maxThreads</name>
    <value>1</value>
  </property>
  <property>
    <name>atlas.hook.hive.minThreads</name>
    <value>1</value>
  </property>
  <property>
    <name>datanucleus.autoCreateSchema</name>
    <value>false</value>
  </property>
  <property>
    <name>datanucleus.cache.level2.type</name>
    <value>none</value>
  </property>
  <property>
    <name>datanucleus.fixedDatastore</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.auto.convert.join</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.auto.convert.join.noconditionaltask</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.auto.convert.join.noconditionaltask.size</name>
    <value>10737418240</value>
  </property>
  <property>
    <name>hive.auto.convert.sortmerge.join</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.auto.convert.sortmerge.join.to.mapjoin</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.cbo.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.cli.print.header</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.compactor.abortedtxn.threshold</name>
    <value>1000</value>
  </property>
  <property>
    <name>hive.compactor.check.interval</name>
    <value>300</value>
  </property>
  <property>
    <name>hive.compactor.delta.num.threshold</name>
    <value>10</value>
  </property>
  <property>
    <name>hive.compactor.delta.pct.threshold</name>
    <value>0.1f</value>
  </property>
  <property>
    <name>hive.compactor.initiator.on</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.compactor.worker.threads</name>
    <value>4</value>
  </property>
  <property>
    <name>hive.compactor.worker.timeout</name>
    <value>86400</value>
  </property>
  <property>
    <name>hive.compute.query.using.stats</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.convert.join.bucket.mapjoin.tez</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.create.as.insert.only</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.default.fileformat</name>
    <value>TextFile</value>
  </property>
  <property>
    <name>hive.default.fileformat.managed</name>
    <value>ORC</value>
  </property>
  <property>
    <name>hive.driver.parallel.compilation</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.enforce.sortmergebucketmapjoin</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.exec.compress.intermediate</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.exec.compress.output</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.exec.dynamic.partition</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.exec.failure.hooks</name>
    <value>org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook</value>
  </property>
  <property>
    <name>hive.exec.max.created.files</name>
    <value>100000</value>
  </property>
  <property>
    <name>hive.exec.max.dynamic.partitions</name>
    <value>5000</value>
  </property>
  <property>
    <name>hive.exec.max.dynamic.partitions.pernode</name>
    <value>2000</value>
  </property>
  <property>
    <name>hive.exec.orc.split.strategy</name>
    <value>HYBRID</value>
  </property>
  <property>
    <name>hive.exec.parallel</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.exec.parallel.thread.number</name>
    <value>8</value>
  </property>
  <property>
    <name>hive.exec.post.hooks</name>
    <value>org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook</value>
  </property>
  <property>
    <name>hive.exec.pre.hooks</name>
    <value>org.apache.hadoop.hive.ql.hooks.HiveProtoLoggingHook</value>
  </property>
  <property>
    <name>hive.exec.reducers.bytes.per.reducer</name>
    <value>4294967296</value>
  </property>
  <property>
    <name>hive.exec.reducers.max</name>
    <value>1009</value>
  </property>
  <property>
    <name>hive.exec.scratchdir</name>
    <value>/tmp/hive</value>
  </property>
  <property>
    <name>hive.exec.submit.local.task.via.child</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.exec.submitviachild</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.execution.mode</name>
    <value>container</value>
  </property>
  <property>
    <name>hive.fetch.task.aggr</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.fetch.task.conversion</name>
    <value>none</value>
  </property>
  <property>
    <name>hive.fetch.task.conversion.threshold</name>
    <value>1073741824</value>
  </property>
  <property>
    <name>hive.hook.proto.base-directory</name>
    <value>{hive_metastore_warehouse_external_dir}/sys.db/query_data/</value>
  </property>
  <property>
    <name>hive.limit.optimize.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.limit.pushdown.memory.usage</name>
    <value>0.04</value>
  </property>
  <property>
    <name>hive.load.data.owner</name>
    <value>hive</value>
  </property>
  <property>
    <name>hive.lock.manager</name>
    <value></value>
  </property>
  <property>
    <name>hive.log.explain.output</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.map.aggr</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.map.aggr.hash.force.flush.memory.threshold</name>
    <value>0.9</value>
  </property>
  <property>
    <name>hive.map.aggr.hash.min.reduction</name>
    <value>0.5</value>
  </property>
  <property>
    <name>hive.map.aggr.hash.percentmemory</name>
    <value>0.5</value>
  </property>
  <property>
    <name>hive.mapjoin.bucket.cache.size</name>
    <value>10000</value>
  </property>
  <property>
    <name>hive.mapjoin.hybridgrace.hashtable</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.mapjoin.optimized.hashtable</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.mapred.reduce.tasks.speculative.execution</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.materializedview.rewriting.incremental</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.merge.mapfiles</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.merge.mapredfiles</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.merge.orcfile.stripe.level</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.merge.rcfile.block.level</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.merge.size.per.task</name>
    <value>256000000</value>
  </property>
  <property>
    <name>hive.merge.smallfiles.avgsize</name>
    <value>100000000</value>
  </property>
  <property>
    <name>hive.merge.tezfiles</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.metastore.authorization.storage.checks</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.metastore.cache.pinobjtypes</name>
    <value>Table,Database,Type,FieldSchema,Order</value>
  </property>
  <property>
    <name>hive.metastore.client.connect.retry.delay</name>
    <value>5s</value>
  </property>
  <property>
    <name>hive.metastore.client.socket.timeout</name>
    <value>1800s</value>
  </property>
  <property>
    <name>hive.metastore.connect.retries</name>
    <value>24</value>
  </property>
  <property>
    <name>hive.metastore.dml.events</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.metastore.event.listeners</name>
    <value></value>
  </property>
  <property>
    <name>hive.metastore.execute.setugi</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.metastore.failure.retries</name>
    <value>24</value>
  </property>
  <property>
    <name>hive.metastore.pre.event.listeners</name>
    <value>org.apache.hadoop.hive.ql.security.authorization.AuthorizationPreEventListener</value>
  </property>
  <property>
    <name>hive.metastore.sasl.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.metastore.server.max.threads</name>
    <value>100000</value>
  </property>
  <property>
    <name>hive.metastore.transactional.event.listeners</name>
    <value>org.apache.hive.hcatalog.listener.DbNotificationListener</value>
  </property>
  <property>
    <name>hive.optimize.bucketmapjoin</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.optimize.bucketmapjoin.sortedmerge</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.optimize.constant.propagation</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.optimize.cp</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.optimize.dynamic.partition.hashjoin</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.optimize.index.filter</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.optimize.metadataonly</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.optimize.null.scan</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.optimize.reducededuplication</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.optimize.reducededuplication.min.reducer</name>
    <value>4</value>
  </property>
  <property>
    <name>hive.optimize.sort.dynamic.partition</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.orc.compute.splits.num.threads</name>
    <value>10</value>
  </property>
  <property>
    <name>hive.orc.splits.include.file.footer</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.prewarm.enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.prewarm.numcontainers</name>
    <value>3</value>
  </property>
  <property>
    <name>hive.repl.cm.enabled</name>
    <value></value>
  </property>
  <property>
    <name>hive.repl.cmrootdir</name>
    <value></value>
  </property>
  <property>
    <name>hive.repl.rootdir</name>
    <value></value>
  </property>
  <property>
    <name>hive.security.authorization.createtable.owner.grants</name>
    <value>ALL</value>
  </property>
  <property>
    <name>hive.security.metastore.authenticator.manager</name>
    <value>org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator</value>
  </property>
  <property>
    <name>hive.security.metastore.authorization.auth.reads</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.security.metastore.authorization.manager</name>
    <value>org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider</value>
  </property>
  <property>
    <name>hive.server2.allow.user.substitution</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.server2.enable.doAs</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.server2.idle.operation.timeout</name>
    <value>6h</value>
  </property>
  <property>
    <name>hive.server2.idle.session.timeout</name>
    <value>1d</value>
  </property>
  <property>
    <name>hive.server2.logging.operation.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.server2.max.start.attempts</name>
    <value>5</value>
  </property>
  <property>
    <name>hive.server2.support.dynamic.service.discovery</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.server2.table.type.mapping</name>
    <value>CLASSIC</value>
  </property>
  <property>
    <name>hive.server2.tez.default.queues</name>
    <value>default</value>
  </property>
  <property>
    <name>hive.server2.tez.sessions.per.default.queue</name>
    <value>1</value>
  </property>
  <property>
    <name>hive.server2.thrift.http.path</name>
    <value>cliservice</value>
  </property>
  <property>
    <name>hive.server2.thrift.http.port</name>
    <value>10001</value>
  </property>
  <property>
    <name>hive.server2.thrift.max.worker.threads</name>
    <value>1200</value>
  </property>
  <property>
    <name>hive.server2.thrift.sasl.qop</name>
    <value>auth</value>
  </property>
  <property>
    <name>hive.server2.transport.mode</name>
    <value>binary</value>
  </property>
  <property>
    <name>hive.server2.use.SSL</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.server2.webui.cors.allowed.headers</name>
    <value>X-Requested-With,Content-Type,Accept,Origin,X-Requested-By,x-requested-by</value>
  </property>
  <property>
    <name>hive.server2.webui.enable.cors</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.server2.webui.port</name>
    <value>10002</value>
  </property>
  <property>
    <name>hive.server2.webui.use.ssl</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.service.metrics.codahale.reporter.classes</name>
    <value>org.apache.hadoop.hive.common.metrics.metrics2.JsonFileMetricsReporter,org.apache.hadoop.hive.common.metrics.metrics2.JmxMetricsReporter,org.apache.hadoop.hive.common.metrics.metrics2.Metrics2Reporter</value>
  </property>
  <property>
    <name>hive.smbjoin.cache.rows</name>
    <value>10000</value>
  </property>
  <property>
    <name>hive.stats.autogather</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.stats.dbclass</name>
    <value>fs</value>
  </property>
  <property>
    <name>hive.stats.fetch.column.stats</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.stats.fetch.partition.stats</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.strict.managed.tables</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.support.concurrency</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.tez.auto.reducer.parallelism</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.tez.bucket.pruning</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.tez.cartesian-product.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.tez.cpu.vcores</name>
    <value>-1</value>
  </property>
  <property>
    <name>hive.tez.dynamic.partition.pruning</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.tez.dynamic.partition.pruning.max.data.size</name>
    <value>104857600</value>
  </property>
  <property>
    <name>hive.tez.dynamic.partition.pruning.max.event.size</name>
    <value>1048576</value>
  </property>
  <property>
    <name>hive.tez.exec.print.summary</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.tez.input.format</name>
    <value>org.apache.hadoop.hive.ql.io.HiveInputFormat</value>
  </property>
  <property>
    <name>hive.tez.input.generate.consistent.splits</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.tez.java.opts</name>
    <value>-server -Djava.net.preferIPv4Stack=true -XX:NewRatio=8 -XX:+UseNUMA -XX:+UseG1GC -XX:+ResizeTLAB -XX:+PrintGCDetails -verbose:gc -XX:+PrintGCTimeStamps</value>
  </property>
  <property>
    <name>hive.tez.log.level</name>
    <value>INFO</value>
  </property>
  <property>
    <name>hive.tez.max.partition.factor</name>
    <value>2.0</value>
  </property>
  <property>
    <name>hive.tez.min.partition.factor</name>
    <value>0.25</value>
  </property>
  <property>
    <name>hive.tez.smb.number.waves</name>
    <value>0.5</value>
  </property>
  <property>
    <name>hive.txn.manager</name>
    <value>org.apache.hadoop.hive.ql.lockmgr.DbTxnManager</value>
  </property>
  <property>
    <name>hive.txn.max.open.batch</name>
    <value>1000</value>
  </property>
  <property>
    <name>hive.txn.strict.locking.mode</name>
    <value>false</value>
  </property>
  <property>
    <name>hive.txn.timeout</name>
    <value>300</value>
  </property>
  <property>
    <name>hive.user.install.directory</name>
    <value>/user/</value>
  </property>
  <property>
    <name>hive.vectorized.execution.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.vectorized.execution.mapjoin.minmax.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.vectorized.execution.mapjoin.native.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.vectorized.execution.reduce.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>hive.vectorized.groupby.checkinterval</name>
    <value>4096</value>
  </property>
  <property>
    <name>hive.vectorized.groupby.flush.percent</name>
    <value>0.1</value>
  </property>
  <property>
    <name>hive.vectorized.groupby.maxentries</name>
    <value>100000</value>
  </property>
  <property>
    <name>mapred.max.split.size</name>
    <value>256000000</value>
  </property>
  <property>
    <name>mapred.min.split.size.per.node</name>
    <value>128000000</value>
  </property>
  <property>
    <name>mapred.min.split.size.per.rack</name>
    <value>128000000</value>
  </property>
  <property>
    <name>metastore.create.as.acid</name>
    <value>true</value>
  </property>

  <property>
    <name>hive.metastore.kerberos.keytab.file</name>
    <value>/etc/security/keytabs/hive.service.keytab</value>
  </property>
  <property>
    <name>hive.metastore.kerberos.principal</name>
    <value>hive/_HOST@EXAMPLE.COM</value>
  </property>
  <property>
    <name>hive.server2.authentication.spnego.keytab</name>
    <value>HTTP/_HOST@EXAMPLE.COM</value>
  </property>
  <property>
    <name>hive.server2.authentication.spnego.principal</name>
    <value>/etc/security/keytabs/spnego.service.keytab</value>
  </property>


  <!--
  <property>
    <name>hive.kudu.master.addresses.default</name>
    <value>bdd11:7051,bdd12:7051,bdd13:7051,app1:7051,es2:7051</value>
  </property>
  <property>
    <name>hive.server2.authentication</name>
    <value>LDAP</value>
  </property>
  <property>
    <name>hive.server2.authentication.ldap.baseDN</name>
    <value>ou=bigdata,dc=gdh,dc=yore,dc=com</value>
  </property>
  <property>
    <name>hive.server2.authentication.ldap.url</name>
    <value>ldap://bdm0:389</value>
  </property>
  <property>
    <name>hive.cluster.delegation.token.store.zookeeper.connectString</name>
    <value>bdm0:2181,bdm1:2181,etl1:2181,es1:2181,es2:2181</value>
  </property>
  <property>
    <name>hbase.zookeeper.quorum</name>
    <value>bdm0,bdm1,etl1,es1,es2</value>
  </property>
  <property>
    <name>hbase.zookeeper.property.clientPort</name>
    <value>2181</value>
  </property>
  <property>
    <name>hive.cluster.delegation.token.store.class</name>
    <value>org.apache.hadoop.hive.thrift.ZooKeeperTokenStore</value>
  </property>
  <property>
    <name>hive.cluster.delegation.token.store.zookeeper.znode</name>
    <value>/hive/cluster/delegation</value>
  </property>
  <property>
    <name>hive.server2.zookeeper.namespace</name>
    <value>hiveserver2</value>
  </property>
  <property>
    <name>hive.zookeeper.client.port</name>
    <value>2181</value>
  </property>
  <property>
    <name>hive.zookeeper.namespace</name>
    <value>hive_zookeeper_namespace</value>
  </property>
  <property>
    <name>hive.zookeeper.quorum</name>
    <value>node01:2181</value>
  </property>
  <property>
    <name>zookeeper.znode.parent</name>
    <value>/hbase-unsecure</value>
  </property>
  -->
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
  • 195
  • 196
  • 197
  • 198
  • 199
  • 200
  • 201
  • 202
  • 203
  • 204
  • 205
  • 206
  • 207
  • 208
  • 209
  • 210
  • 211
  • 212
  • 213
  • 214
  • 215
  • 216
  • 217
  • 218
  • 219
  • 220
  • 221
  • 222
  • 223
  • 224
  • 225
  • 226
  • 227
  • 228
  • 229
  • 230
  • 231
  • 232
  • 233
  • 234
  • 235
  • 236
  • 237
  • 238
  • 239
  • 240
  • 241
  • 242
  • 243
  • 244
  • 245
  • 246
  • 247
  • 248
  • 249
  • 250
  • 251
  • 252
  • 253
  • 254
  • 255
  • 256
  • 257
  • 258
  • 259
  • 260
  • 261
  • 262
  • 263
  • 264
  • 265
  • 266
  • 267
  • 268
  • 269
  • 270
  • 271
  • 272
  • 273
  • 274
  • 275
  • 276
  • 277
  • 278
  • 279
  • 280
  • 281
  • 282
  • 283
  • 284
  • 285
  • 286
  • 287
  • 288
  • 289
  • 290
  • 291
  • 292
  • 293
  • 294
  • 295
  • 296
  • 297
  • 298
  • 299
  • 300
  • 301
  • 302
  • 303
  • 304
  • 305
  • 306
  • 307
  • 308
  • 309
  • 310
  • 311
  • 312
  • 313
  • 314
  • 315
  • 316
  • 317
  • 318
  • 319
  • 320
  • 321
  • 322
  • 323
  • 324
  • 325
  • 326
  • 327
  • 328
  • 329
  • 330
  • 331
  • 332
  • 333
  • 334
  • 335
  • 336
  • 337
  • 338
  • 339
  • 340
  • 341
  • 342
  • 343
  • 344
  • 345
  • 346
  • 347
  • 348
  • 349
  • 350
  • 351
  • 352
  • 353
  • 354
  • 355
  • 356
  • 357
  • 358
  • 359
  • 360
  • 361
  • 362
  • 363
  • 364
  • 365
  • 366
  • 367
  • 368
  • 369
  • 370
  • 371
  • 372
  • 373
  • 374
  • 375
  • 376
  • 377
  • 378
  • 379
  • 380
  • 381
  • 382
  • 383
  • 384
  • 385
  • 386
  • 387
  • 388
  • 389
  • 390
  • 391
  • 392
  • 393
  • 394
  • 395
  • 396
  • 397
  • 398
  • 399
  • 400
  • 401
  • 402
  • 403
  • 404
  • 405
  • 406
  • 407
  • 408
  • 409
  • 410
  • 411
  • 412
  • 413
  • 414
  • 415
  • 416
  • 417
  • 418
  • 419
  • 420
  • 421
  • 422
  • 423
  • 424
  • 425
  • 426
  • 427
  • 428
  • 429
  • 430
  • 431
  • 432
  • 433
  • 434
  • 435
  • 436
  • 437
  • 438
  • 439
  • 440
  • 441
  • 442
  • 443
  • 444
  • 445
  • 446
  • 447
  • 448
  • 449
  • 450
  • 451
  • 452
  • 453
  • 454
  • 455
  • 456
  • 457
  • 458
  • 459
  • 460
  • 461
  • 462
  • 463
  • 464
  • 465
  • 466
  • 467
  • 468
  • 469
  • 470
  • 471
  • 472
  • 473
  • 474
  • 475
  • 476
  • 477
  • 478
  • 479
  • 480
  • 481
  • 482
  • 483
  • 484
  • 485
  • 486
  • 487
  • 488
  • 489
  • 490
  • 491
  • 492
  • 493
  • 494
  • 495
  • 496
  • 497
  • 498
  • 499
  • 500
  • 501
  • 502
  • 503
  • 504
  • 505
  • 506
  • 507
  • 508
  • 509
  • 510
  • 511
  • 512
  • 513
  • 514
  • 515
  • 516
  • 517
  • 518
  • 519
  • 520
  • 521
  • 522
  • 523
  • 524
  • 525
  • 526
  • 527
  • 528
  • 529
  • 530
  • 531
  • 532
  • 533
  • 534
  • 535
  • 536
  • 537
  • 538
  • 539
  • 540
  • 541
  • 542
  • 543
  • 544
  • 545
  • 546
  • 547
  • 548
  • 549
  • 550
  • 551
  • 552
  • 553
  • 554
  • 555
  • 556
  • 557
  • 558
  • 559
  • 560
  • 561
  • 562
  • 563
  • 564
  • 565
  • 566
  • 567
  • 568
  • 569
  • 570
  • 571
  • 572
  • 573
  • 574
  • 575
  • 576
  • 577
  • 578
  • 579
  • 580
  • 581
  • 582
  • 583
  • 584
  • 585
  • 586
  • 587
  • 588
  • 589
  • 590
  • 591
  • 592
  • 593
  • 594
  • 595
  • 596
  • 597
  • 598
  • 599
  • 600
  • 601
  • 602
  • 603
  • 604
  • 605
  • 606
  • 607
  • 608
  • 609
  • 610
  • 611
  • 612
  • 613
  • 614
  • 615
  • 616
  • 617
  • 618
  • 619
  • 620
  • 621
  • 622
  • 623
  • 624
  • 625
  • 626
  • 627
  • 628
  • 629
  • 630
  • 631
  • 632
  • 633
  • 634
  • 635
  • 636
  • 637
  • 638
  • 639
  • 640
  • 641
  • 642
  • 643
  • 644
  • 645
  • 646
  • 647
  • 648
  • 649
  • 650
  • 651
  • 652
  • 653
  • 654
  • 655
  • 656
  • 657
  • 658
  • 659
  • 660
  • 661
  • 662
  • 663
  • 664
  • 665
  • 666
  • 667
  • 668
  • 669
  • 670
  • 671
  • 672
  • 673
  • 674
  • 675
  • 676
  • 677
  • 678
  • 679
  • 680
  • 681
  • 682
  • 683
  • 684
  • 685
  • 686
  • 687
  • 688
  • 689
  • 690
  • 691
  • 692
  • 693
  • 694
  • 695
  • 696
  • 697
  • 698
  • 699
  • 700
  • 701
  • 702
  • 703
  • 704
  • 705
  • 706
  • 707
  • 708
  • 709
  • 710
  • 711
  • 712
  • 713
  • 714
  • 715
  • 716
  • 717
  • 718
  • 719
  • 720
  • 721
  • 722
  • 723
  • 724
  • 725
  • 726
  • 727
  • 728
  • 729
  • 730
  • 731
  • 732
  • 733
  • 734
  • 735
  • 736
  • 737
  • 738
  • 739
  • 740
  • 741
  • 742
  • 743
  • 744
  • 745
  • 746
  • 747
  • 748
  • 749
  • 750
  • 751
  • 752
  • 753
  • 754
  • 755
  • 756
  • 757
  • 758
  • 759
  • 760
  • 761
  • 762
  • 763
  • 764
  • 765
  • 766
  • 767
  • 768
  • 769
  • 770
  • 771
  • 772
  • 773
  • 774
  • 775
  • 776
  • 777
  • 778
  • 779
  • 780
  • 781
  • 782
  • 783
  • 784
  • 785
  • 786
  • 787
  • 788
  • 789
  • 790
  • 791
  • 792
  • 793
  • 794
  • 795
  • 796
  • 797
  • 798
  • 799
  • 800
  • 801
  • 802
  • 803
  • 804
  • 805
  • 806
  • 807
  • 808
  • 809
  • 810
  • 811
  • 812
  • 813
  • 814
  • 815
  • 816
  • 817
  • 818
  • 819
  • 820
  • 821
  • 822
  • 823
  • 824
  • 825
  • 826
  • 827
  • 828
  • 829
  • 830
  • 831
  • 832
  • 833
  • 834
  • 835
  • 836
  • 837
  • 838
  • 839
  • 840
  • 841
  • 842
  • 843
  • 844
  • 845
  • 846
  • 847
  • 848
  • 849
  • 850
  • 851
  • 852
  • 853
  • 854
  • 855
  • 856
  • 857
  • 858
  • 859
  • 860
  • 861
  • 862
  • 863
  • 864
  • 865
  • 866
  • 867
  • 868
  • hive.metastore.warehouse.dir=/warehouse/tablespace/managed/hivehive.metastore.warehouse.external.dir=/warehouse/tablespace/external/hive 指定了内表和外表在 HDFS 上存储的路径。
  • 如果 Hive 元数据保存在 MySQL,需要修改 hive.metastore.db.type=mysqljavax.jdo.option.ConnectionDriverName=com.mysql.jdbc.Driver=com.mysql.jdbc.Driverjavax.jdo.option.ConnectionURL=jdbc:mysql://node01:3306/hive?createDatabaseIfNotExist=true&amp;useUnicode=true&amp;characterEncoding=UTF-8&amp;useSSL=falsejavax.jdo.option.ConnectionUserName=hivejavax.jdo.option.ConnectionPassword=****** 等配置,生产环境建议保存在单独集群之外的关系型数据库中。
  • hive.tez.container.size=8192 和 Tez 配置中的 tez.task.resource.memory.mb 保存一直。
  • hive.heapsize=2048 可以适当调大,例如这里设置为 2048。
  • hive.insert.into.multilevel.dirs=true 当为 true 时表示允许生成多级目录,否则父级目录必须存在。
  • hive.exec.stagingdir=/tmp/hive/.hive-staging 默认为 .hive-staging,表示在表当前目录下生成 .hive-staging 的暂存文件,直立指定到表目录外的 HDFS 路径,因为生产环境 Hive 数据可能会使用一些同步工具抽数,有些工具为了加快读写速度,会直接读取 Hive 对应表的 HDFS 路径上满足要求的文件,例如 DataX,如果当前正在变更 Hive 数据,则使用工具同步数据时可能数据会重复或者加倍。
  • hive.metastore.event.listeners 如果需要监听 hive metastore 可以配置此项,自定义时将继承 MetaStoreEventListener 的实现类打好的 jar 包放到 $HIVE_HOME/lib 下并配置到此项即可。
  • 如果是 HA 或者开启 LDAP 用户认证可以参考最后注释掉的配置项。
  • 其它配置项详见官方文档 AdminManual+Configuration

3.18 将对应配置和包发送到其它节点

同样的配置发送到其它节点,并进行适当的修改。

3.19 一些问题

如果使用普通用户启动,大多会遇到的权限问题,根据日志将对应的文件夹设置为启动用户有权限的属组。测试环境可能会遇到资源问题,可以根据实际情况调小部分配置即可。

如果 yarn.nodemanager.aux-services 添加了 spark2_shuffle,启动 YARN 可能会报如下错误 :

ERROR org.apache.hadoop.yarn.server.nodemanager.NodeManager: Error starting NodeManager
java.lang.UnsatisfiedLinkError: Could not load library. Reasons: [no leveldbjni64-1.8 in java.library.path, no leveldbjni-1.8 in java.library.path, no leveldbjni in java.library.path, No such file or directory]
  • 1
  • 2

hadoop 启动时会加载 /usr/hdp/3.1.5.0-152/hadoop-hdfs/lib/leveldbjni-all-1.8.jar,
将 spark2_3_1_5_0_152-yarn-shuffle-2.3.2.3.1.5.0-152.noarch.rpm 中的 spark-2.3.2.3.1.5.0-152-yarn-shuffle.jar 对应版本的 libleveldbjni.so,放置到 java.library.path 中,系统当前 java.library.path 值可通过如下查看:

java -XshowSettings:properties
  • 1

4 启动

4.1 Hadoop

/usr/hdp/current/hadoop/bin/hdfs namenode -format
chown -R hdfs:hadoop /hadoop
mkdir /hadoop/{yarn,mapred,mapreduce}
hadoop fs -mkdir /{home,user,tmp}
hadoop fs -mkdir -p /hdp/apps/3.1.5.0-152/{mapreduce,tez}
hadoop fs -put /usr/hdp/current/hadoop/mapreduce.tar.gz /hdp/apps/3.1.5.0-152/mapreduce/
chmod 755 /usr/hdp/3.1.5.0-152/hadoop-yarn/bin/container-executor
usermod -G hadoop hdfs
usermod -G hadoop yarn
usermod -G hdfs yarn
chown root:hadoop /var/lib/{hadoop-hdfs,hadoop-mapreduce,hadoop-yarn}
hdfs dfsadmin -safemode get
hdfs dfsadmin -safemode leave


#su - hdfs
/usr/hdp/current/hadoop/bin/hdfs --config /etc/hadoop/conf --daemon start namenode
/usr/hdp/current/hadoop/bin/hdfs --config /etc/hadoop/conf --daemon start secondarynamenode
/usr/hdp/current/hadoop/bin/hdfs --config /etc/hadoop/conf --daemon start datanode

#su - yarn
/usr/hdp/current/hadoop/bin/yarn --config /etc/hadoop/conf --daemon start nodemanager
/usr/hdp/current/hadoop/bin/yarn --config /etc/hadoop/conf --daemon start resourcemanager
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23

4.2 ZooKeeper

mkdir -p /var/lib/zookeeper
echo "1" > /var/lib/zookeeper/myid

/usr/hdp/current/zookeeper/bin/zkServer.sh start /etc/zookeeper/conf/zoo.cfg
  • 1
  • 2
  • 3
  • 4

4.3 Hive

wget https://repo.huaweicloud.com/repository/maven/mysql/mysql-connector-java/5.1.47/mysql-connector-java-5.1.47.jar -P /usr/hdp/current/hive/lib/
/usr/hdp/current/hive/bin/schematool -dbType mysql -initSchema
hadoop fs -put /usr/hdp/current/tez/lib/tez.tar.gz /hdp/apps/3.1.5.0-152/tez/

/usr/hdp/current/hive/bin/hive --service metastore >/dev/null 2>&1 &
/usr/hdp/current/hive/bin/hive --service hiveserver2 >/dev/null 2>&1 &
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6

5 测试

5.1 Hadoop

hadoop dfs -mkdir /tmp/input
hadoop fs -put /usr/hdp/current/hadoop/src/dev-support/README.md /tmp/input
hadoop jar /usr/hdp/current/hadoop-mapreduce/hadoop-mapreduce-examples.jar wordcount /tmp/input /tmp/output
  • 1
  • 2
  • 3

5.2 ZooKeeper

/usr/hdp/current/zookeeper/bin/zkServer.sh status /etc/zookeeper/conf/zoo.cfg
  • 1

5.3 Hive

/usr/hdp/current/hive/bin/beeline --color=true -u jdbc:hive2://node01:10000/default -n hive
  • 1
0: jdbc:hive2://node01:10000/default> set hive.execution.engine;
+----------------------------+
|            set             |
+----------------------------+
| hive.execution.engine=tez  |
+----------------------------+
1 row selected (0.403 seconds)

-- 建表
CREATE TABLE `visit_t01` (
uid string,
visit_date string,
visit_count bigint
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;

-- 插入测试数据
INSERT INTO visit_t01 VALUES ('u01', '2019/11/21', 5),('u02', '2019/11/23', 6),
('u03', '2019/11/22', 8),('u04', '2019/11/20', 3),('u01', '2019/11/23', 6),
('u01', '2019/12/21', 8),('u02', '2019/11/23', 6),('u01', '2019/12/22', 4);

-- 查询插入的数据
0: jdbc:hive2://node01:10000/default> SELECT * FROM visit_t01 LIMIT 10;
+----------------+-----------------------+------------------------+
| visit_t01.uid  | visit_t01.visit_date  | visit_t01.visit_count  |
+----------------+-----------------------+------------------------+
| u01            | 2019/11/21            | 5                      |
| u02            | 2019/11/23            | 6                      |
| u03            | 2019/11/22            | 8                      |
| u04            | 2019/11/20            | 3                      |
| u01            | 2019/11/23            | 6                      |
| u01            | 2019/12/21            | 8                      |
| u02            | 2019/11/23            | 6                      |
| u01            | 2019/12/22            | 4                      |
+----------------+-----------------------+------------------------+
8 rows selected 


-- 统计每个用户的累计访问量,月计访问量和累计访问量
 SELECT B.uid,B.visit_date2,B.v_count `月计`,
 SUM(v_count) OVER(PARTITION BY uid ORDER BY visit_date2) `累计` FROM (
 SELECT uid,visit_date2,SUM(visit_count) AS v_count FROM (
 SELECT uid,date_format(regexp_replace(visit_date, '/','-'),'yyyy-MM') visit_date2,visit_count
 FROM visit_t01
 ) A GROUP BY uid,visit_date2
 ) B;
----------------------------------------------------------------------------------------------
        VERTICES      MODE        STATUS  TOTAL  COMPLETED  RUNNING  PENDING  FAILED  KILLED  
----------------------------------------------------------------------------------------------
Map 1 .......... container     SUCCEEDED      1          1        0        0       0       0  
Reducer 2 ...... container     SUCCEEDED      2          2        0        0       0       0  
----------------------------------------------------------------------------------------------
VERTICES: 02/02  [==========================>>] 100%  ELAPSED TIME: 20.42 s    
----------------------------------------------------------------------------------------------
+--------+----------------+-----+-----+
| b.uid  | b.visit_date2  | 月计  | 累计  |
+--------+----------------+-----+-----+
| u01    | 2019-11        | 11  | 11  |
| u01    | 2019-12        | 12  | 23  |
| u03    | 2019-11        | 8   | 8   |
| u02    | 2019-11        | 12  | 12  |
| u04    | 2019-11        | 3   | 3   |
+--------+----------------+-----+-----+
5 rows selected (25.988 seconds)

  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/秋刀鱼在做梦/article/detail/833949
推荐阅读
相关标签
  

闽ICP备14008679号