赞
踩
import findspark
findspark.init(spark_home="/usr/hdp/current/spark2-client/",python_path="/usr/bin/python3")
from pyspark import SparkConf,SparkContext,SQLContext
conf = SparkConf().setMaster("yarn").setAppName("http")
sc = SparkContext(conf = conf)
sqlContext = SQLContext(sc)
df2 = sqlContext.read.schema(schema).option("header", "false").option("delimiter","\t").option("mode", "DROPMALFORMED").csv("/s_tmp/100k_rows.csv")
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。