赞
踩
- --master yarn-cluster \
- --jars mysql-connector-java-8.0.28.jar,sparklens-0.3.2-s_2.11.jar \
- --driver-class-path mysql-connector-java-8.0.28.jar \
- --conf spark.executor.extraClassPath=sparklens-0.3.2-s_2.11.jar \
- --conf spark.extraListeners=com.qubole.sparklens.QuboleJobListener \
- --conf spark.sparklens.reporting.disabled=true \
- --conf spark.sparklens.data.dir=/projects/sparklens \
- --conf spark.default.parallelism=1900 \
- --conf spark.sql.shuffle.partitions=1900 \
- --conf spark.dynamicAllocation.maxExecutors=250 \
- --conf spark.dynamicAllocation.executorIdleTimeout=120s \
- --conf spark.dynamicAllocation.initialExecutors=200 \
- --conf spark.dynamicAllocation.minExecutors=200 \
- --conf spark.executor.cores=3 \
- --conf spark.executor.memory=512m \
- --conf spark.executor.memoryOverhead=1g \
- --conf spark.blacklist.enabled=true \
- --conf spark.yarn.nodemanager.vmem-check-enabled=false \
- --conf spark.yarn.nodemanager.pmem-check-enabled=false \
- --conf spark.speculation.quantile=0.5 \
- --conf spark.speculation.multiplier=1.4 \
- --conf spark.yarn.dist.files=hdfs://projects/log4j2.properties#log4j2.propertie \
- --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=file:log4j2.propertie" \
- --conf "spark.executor.extraJavaOptions=-Dlog4j.configuration=file:log4j2.propertie" \
- # 给下面的 Spark 启动配置加上注释
- --master yarn-cluster \ # 设置 Spark 的运行模式为 yarn-cluster,即在 YARN 集群中运行应用程序
- --jars mysql-connector-java-8.0.28.jar,sparklens-0.3.2-s_2.11.jar \ # 添加外部的 JAR 包 mysql-connector-java-8.0.28.jar 和 sparklens-0.3.2-s_2.11.jar
- --driver-class-path mysql-connector-java-8.0.28.jar \ # 设置 Driver 运行时的 classpath,将 mysql-connector-java-8.0.28.jar 添加到 classpath 中
- --conf spark.executor.extraClassPath=sparklens-0.3.2-s_2.11.jar \ # 设置 Executor 运行时的 classpath,将 sparklens-0.3.2-s_2.11.jar 添加到 classpath 中
- --conf spark.extraListeners=com.qubole.sparklens.QuboleJobListener \ # 启用 QuboleJobListener,用于监控 Spark 作业的性能指标
- --conf spark.sparklens.reporting.disabled=true \ # 禁用 SparkLens 的报告功能,避免浪费资源
- --conf spark.sparklens.data.dir=/projects/sparklens \ # 指定 SparkLens 存储数据的目录
- --conf spark.default.parallelism=1900 \ # 设置默认并行度,即每个任务的分区数,默认值为核数 * 2
- --conf spark.sql.shuffle.partitions=1900 \ # 设置 Shuffle 操作的分区数,默认值为核数 * 200
- --conf spark.dynamicAllocation.maxExecutors=250 \ # 设置动态分配的最大 Executor 数量
- --conf spark.dynamicAllocation.executorIdleTimeout=120s \ # 设置 Executor 空闲时间,超过该时间后将被回收
- --conf spark.dynamicAllocation.initialExecutors=200 \ # 设置动态分配的初始 Executor 数量
- --conf spark.dynamicAllocation.minExecutors=200 \ # 设置动态分配的最小 Executor 数量
- --conf spark.executor.cores=3 \ # 设置每个 Executor 使用的 CPU 核数
- --conf spark.executor.memory=512m \ # 设置每个 Executor 的内存大小
- --conf spark.executor.memoryOverhead=1g \ # 设置 Executor 的内存 overhead
- --conf spark.blacklist.enabled=true \ # 启用 Executor 黑名单功能,避免因 Executor 运行异常导致的作业失败
- --conf spark.yarn.nodemanager.vmem-check-enabled=false \ # 禁用虚拟内存检查,避免因虚拟内存检查导致的作业失败
- --conf spark.yarn.nodemanager.pmem-check-enabled=false \ # 禁用物理内存检查,避免因物理内存检查导致的作业失败
- --conf spark.speculation.quantile=0.5 \ # 设置任务推测执行的百分比
- --conf spark.speculation.multiplier=1.4 \ # 设置任务推测执行的倍数
- --conf spark.yarn.dist.files=hdfs://projects/log4j2.properties#log4j2.propertie \ # 将日志配置文件log4j2.properties添加到Spark应用程序的classpath
- --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=file:log4j2.propertie" \ # 配置Spark驱动程序(Driver)的日志级别等参数,将log4j2.properties文件作为驱动程序(Driver)的日志配置文件
- --conf "spark.executor.extraJavaOptions=-Dlog4j.configuration=file:log4j2.propertie" \ # 配置Spark Executor的日志级别等参数,将log4j2.properties文件作为Executor的日志配置文件
-
- --master yarn-cluster \
- --jars mysql-connector-java-8.0.28.jar \
- --driver-class-path mysql-connector-java-8.0.28.jar \
- --conf spark.default.parallelism=5000 \
- --conf spark.sql.shuffle.partitions=5000 \
- --conf spark.dynamicAllocation.maxExecutors=1000 \
- --conf spark.dynamicAllocation.executorIdleTimeout=120s \
- --conf spark.dynamicAllocation.initialExecutors=100 \
- --conf spark.dynamicAllocation.minExecutors=100 \
- --conf spark.executor.cores=1 \
- --conf spark.executor.memory=10g \
- --conf spark.executor.memoryOverhead=1g \
- --conf spark.shuffle.memoryFraction=0.2 \
- --conf spark.excludeOnFailure.enabled=true \
- --conf spark.yarn.nodemanager.vmem-check-enabled=false \
- --conf spark.yarn.nodemanager.pmem-check-enabled=false \
- --conf spark.speculation.quantile=0.5 \
- --conf spark.speculation.multiplier=1.4 \
- --conf spark.rss.enabled=true \
- --conf spark.shuffle.io.clientThreads=16 \
- --conf spark.yarn.dist.files=hdfs://R2/a.conf#aa.conf,hdfs://R2/a/log4j.properties#log4j-first.properties \
- --conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=file:log4j-first.properties" \
- --conf "spark.executor.extraJavaOptions=-Dlog4j.configuration=file:log4j-first.properties" \
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。