赞
踩
配置环境
# scala
export SCALA_HOME=/opt/bigdata/scala/default
tar -zxvf spark-2.4.3-bin-hadoop2.7.tgz -C ./
cp spark-env.sh.template spark-env.sh vim spark-env.sh 配置一下信息 export JAVA_HOME=/usr/local/java_1.8.0_121 #SCALA环境变量 export SCALA_HOME=/opt/bigdata/scala/default #Hadoop路径 export HADOOP_HOME=/opt/bigdata/hadoop/default #Hadoop配置目录 export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop export SPARK_YARN_USER_ENV=${HADOOP_CONF_DIR} export SPARK_HOME=/opt/bigdata/spark/default export HIVE_HOME=/opt/bigdata/hive/default export HIVE_CONF_DIR=${HIVE_HOME}/conf export PATH=${JAVA_HOME}/bin:${SCALA_HOME}/bin:${HADOOP_HOME}/bin:${SPARK_HOME}/bin:${HIVE_HOME}/bin:$PATH
cp spark-defaults.conf.template spark-defaults.conf vim spark-defaults.conf 配置如下信息 # spark job log收集,收集到hdfs上 spark.eventLog.enabled true spark.eventLog.dir hdfs://ecs-6531-0002.novalocal:9000/tmp/spark/eventLogs spark.eventLog.compress true #默认序列化方式 spark.serializer org.apache.spark.serializer.KryoSerializer # 部署模式yarn spark.master yarn # 默认driver核心数 spark.driver.cores 1 # 默认driver内存数 spark.driver.memory 800m # 默认executer核心数 spark.executor.cores 1 # 默认executer内存数 spark.executor.memory 1000m # 默认executer实例数 spark.executor.instances 1 # hive仓库地址 spark.sql.warehouse.dir hdfs://ecs-6531-0002.novalocal:9000/user/root/warehouse
cp $HIVE_HOME/conf/hive-site.xml $SPARK_HOME/conf/hive-site.xml
# spark 配置
export SPARK_YARN_USER_ENV=${HADOOP_CONF_DIR}
export SPARK_HOME=/opt/bigdata/spark/default
export PATH=${SCALA_HOME}/bin:${SPARK_HOME}/bin:$PATH
cp /opt/bigdata/spark/spark-2.4.3-bin-hadoop2.7/yarn/spark-2.4.3-yarn-shuffle.jar /opt/bigdata/hadoop/hadoop-3.2.0/share/hadoop/yarn/
需要把spark_shuffle加上
<!-- 指定reducer获取数据的方式-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle,spark_shuffle</value>
</property>
直接输入spark-sql启动
测试查询hive
使用spark提交任务 直接spark-submit jar包即可
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。