赞
踩
本文记录了用pyspark构建一个简单的模型的过程。
from pyspark.sql import SparkSession
from pyspark.ml.feature import StringIndexer
from pyspark.ml.classification import LogisticRegression
from pyspark.ml import Pipeline
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.evaluation import BinaryClassificationEvaluator
from pyspark import SparkContext, SparkConf
conf = SparkConf().setAppName("Spark_mllearn_example").setMaster("local")
sc = SparkContext(conf=conf)
spark = SparkSession.builder.master("local").appName("Spark_mllearn_example").config("", "").getOrCreate()
dpath = '/Users/huoshirui/Desktop/Spark/'
df = spark.read.csv(dpath + 'spark_mllearn_test.csv', header=True)
数据集如下图:
df = df.withColumn('c2', df['c2'].cast('double'))\
.withColumn('c3', df['c3'
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。