当前位置:   article > 正文

Spark MLlib之朴素贝叶斯_贝叶斯调参spark实现

贝叶斯调参spark实现

由于没有找到相应的数据,就自己写了一个随机生成数据的代码:

  1. package mllib
  2. import org.apache.spark.{SparkConf, SparkContext}
  3. import org.apache.spark.mllib.linalg.DenseVector
  4. import org.apache.spark.mllib.regression.LabeledPoint
  5. import org.apache.spark.mllib.util.MLUtils
  6. import org.apache.spark.rdd.RDD
  7. import scala.util.Random
  8. /**
  9. * created by LMR on 2019/6/10
  10. */
  11. object NaiveBayesDataProduce {
  12. def main(args: Array[String]): Unit = {
  13. val random = new Random()
  14. val numsInstances = 1000
  15. val numsfeature = 3
  16. val numsclass = 3
  17. var data: Array[LabeledPoint] = Array.fill[LabeledPoint](numsInstances)(null)
  18. for (i <- 0 to numsInstances - 1)
  19. {
  20. val array: Array[Double] = Array.fill[Double](numsfeature)(1)
  21. for (j <- 0 to numsfeature - 1){
  22. array(j) = random.nextInt(5)
  23. }
  24. val vector = new DenseVector(array)
  25. val label: Int = random.nextInt(3)
  26. data(i) = LabeledPoint(label, vector)
  27. }
  28. val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("naiveBayes")
  29. val sc = new SparkContext(conf)
  30. val dataRDD: RDD[LabeledPoint] = sc.parallelize(data)
  31. MLUtils.saveAsLibSVMFile(dataRDD,"E://output")
  32. }
  33. }

朴素贝叶斯模型:

  1. package mllib
  2. import org.apache.spark.mllib.classification.{NaiveBayes, NaiveBayesModel}
  3. import org.apache.spark.mllib.regression.LabeledPoint
  4. import org.apache.spark.mllib.util.{MLUtils, SVMDataGenerator}
  5. import org.apache.spark.rdd.RDD
  6. import org.apache.spark.{SparkConf, SparkContext}
  7. import org.apache.log4j.{Level, Logger}
  8. /**
  9. * created by LMR on 2019/6/10
  10. */
  11. object Naive_Bayes {//要求特征值非负
  12. Logger.getRootLogger.setLevel(Level.ERROR)
  13. def main(args: Array[String]): Unit = {
  14. val conf: SparkConf = new SparkConf().setMaster("local[*]").setAppName("naiveBayes")
  15. val sc = new SparkContext(conf)
  16. val data: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "E://output")
  17. //划分训练集喝测试集
  18. val splits: Array[RDD[LabeledPoint]] = data.randomSplit(Array(0.6,0.4), seed = 11L)
  19. val train: RDD[LabeledPoint] = splits(0)
  20. val test: RDD[LabeledPoint] = splits(1)
  21. //建立贝叶斯模型
  22. val model: NaiveBayesModel = NaiveBayes.train(train, lambda = 1.0, modelType = "multinomial")
  23. //测试集进行测试
  24. val predictionAndLabel: RDD[(Double, Double)] = test.map(p => (model.predict(p.features), p.label))
  25. val print_prediction: Array[(Double, Double)] = predictionAndLabel.take(20)
  26. for (elem <- print_prediction) {println(elem._1 + "\t" + elem._2)}
  27. //准确率
  28. val accuracy: Double = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count()
  29. println(accuracy)
  30. }
  31. }

完整代码/数据地址:git地址

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/从前慢现在也慢/article/detail/564598
推荐阅读
相关标签
  

闽ICP备14008679号