赞
踩
配置jupyter在spark+hadoop集群上进行spark矩阵分解推荐算法【内附vm,virtualbox集成镜像下载】
1、【镜像下载地址】请点击这里访问百度云盘下载镜像(需要安装百度云盘客户端,文件大小是6GB)
以上镜像免去安装集群的麻烦
2、下载Anaconda3-4.0.0-Linux-x86_64.sh
安装到/usr/local/Anaconda/
3、修改/etc/profile文件,如下,除了ip,其他尤其path中/usr/local/Anaconda/bin:一定要放在spark_home之前,因为内带的python版本不一样,镜像是2.7,anaconda是3.5,必须为了运行jupyter必须都是用3.5,否则会出现各种问题
#java
export JAVA_HOME=/usr/lib/jvm/default-java
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=.:$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
#Hadoop
export HADOOP_HOME=/usr/local/hadoop
export CLASSPATH=.:$HADOOP_HOME/lib:$CLASSPATH
export PATH=$PATH:$HADOOP_HOME/bin:/home/hadoop/sbt/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_ROOT_LOGGER=INFO,console
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
#scala
export SCALA_HOME=/usr/local/scala
export PATH=$SCALA_HOME/bin:$PATH
#spark
export SPARK_HOME=/usr/local/spark
export PATH=/usr/local/Anaconda/bin:$SPARK_HOME/bin:$PATH
#py-spark
export PYTHONPATH=$SPARK_HOME/python/:$PYTHONPATH
export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.10.4-src.zip:$PYTHONPATH
export PYSPARK_PYTHON=python3.5
export PYSPARK_DRIVER_PYTHON=jupyter
#export PYSPARK_DRIVER_PYTHON_OPTS="notebook --ip=192.168.218.100 --no-browser --allow-root"
export PYSPARK_DRIVER_PYTHON_OPTS="notebook --ip=192.168.218.100 --no-browser"
#export PYSPARK_PYTHON=/usr/bin/python
#export PYSPARK_DRIVER_PYTHON=/usr/bin/python
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。