当前位置:   article > 正文

spark mllib机器学习之二 DecisionTree_"decisiontreeclassificationmodel \"graphvi\" 可视化 t

"decisiontreeclassificationmodel \"graphvi\" 可视化 treemodel.todebugstring"


数据格式:

1 1:2 2:3 3:4
2 1:1 2:2 3:3
1 1:1 2:3 3:3
1 1:3 2:1 3:3
1 1:4 2:6 3:7
2 1:1 2:5 3:5
1 1:3 2:3 3:3
1 1:3 2:2 3:3
1 1:4 2:3 3:4
2 1:2 2:6 3:6
1 1:1 2:7 3:3
1 1:4 2:1 3:2
1 1:3 2:3 3:7
2 1:5 2:5 3:5


package com.agm.clssify



import org.apache.spark.mllib.tree.DecisionTree
import org.apache.spark.mllib.tree.model.DecisionTreeModel
import org.apache.spark.mllib.util.MLUtils
import java.io.File
import java.io.PrintWriter
import java.io.File
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.log4j.{Level, Logger}
object c45 {
  def main(args:Array[String]){
    Logger.getLogger("org").setLevel(Level.ERROR)
    val path = new File(".").getCanonicalPath()
    /*
    System.getProperties().put("hadoop.home.dir", path);
    new File("./bin").mkdirs();
    new File("./bin/winutils.exe").createNewFile();
    */
    val conf = new SparkConf().setAppName("Simple Application")       //给Application命名    
    conf.setMaster("local")
    val sc = new SparkContext(conf)
    println("be")
    val data = MLUtils.loadLibSVMFile(sc,"F:\\testData\\spark\\svm.txt")


    val splits = data.randomSplit(Array(0.8,0.2))
    val (trainData,testData) = (splits(0),splits(1))
    testData.foreach(println)
    println("sdaf")
    testData.foreach(f=>println(f.features))
    val numClasses = 4
    val categoricalFeaturesInfo = Map[Int, Int]()
    val impurity = "gini"
    val maxDepth = 10
    val maxBins = 32


    val model = DecisionTree.trainClassifier(trainData,numClasses,categoricalFeaturesInfo,impurity,maxDepth,maxBins)


    val labelAndPreds = testData.map { point =>
    val prediction = model.predict(point.features)
    (point.label,prediction)
    }


    val testErr = labelAndPreds.filter(r => r._1 != r._2).count().toDouble /testData.count()
    println("Test Error =" + testErr)
    println("Learned classification tree model:\n" + model.toDebugString)


    model.save(sc, "F:\\testData\\spark\\myDecisionTreeClassificationModel")
    val sameModel = DecisionTreeModel.load(sc, "F:\\testData\\spark\\myDecisionTreeClassificationModel")


  }
}
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/很楠不爱3/article/detail/547203
推荐阅读
相关标签
  

闽ICP备14008679号