当前位置:   article > 正文

R语言实现SVM算法——分类与回归

R语言实现SVM算法——分类与回归
  1. ### 11.6 基于支持向量机进行类别预测 ###
  2. # 构建数据子集
  3. X <- iris[iris$Species!= 'virginica',2:3] # 自变量:Sepal.Width, Petal.Length
  4. y <- iris[iris$Species != 'virginica','Species'] # 因变量
  5. plot(X,col = y,pch = as.numeric(y)+15,cex = 1.5) # 绘制散点图
  6. # 构建支持向量机分类器
  7. library(e1071)
  8. svm.model <- svm(x = X,y = y,kernel = 'linear',degree = 1,scale = FALSE)
  9. summary(svm.model)
  10. svm.model$index # 查看支持向量的序号
  11. svm.model$nSV # 查看各类的支持向量个数
  12. svm.model$SV # 查看支持向量的自变量值
  13. # 绘制SVM分类器的判别边界实线、支持向量及最大间隔分类
  14. plot_svc_decision_boundary <- function(svm.model,X) {
  15. w = t(svm.model$coefs) %*% svm.model$SV
  16. b = -svm.model$rho
  17. margin = 1/w[2]
  18. abline(a = -b/w[1,2],b=-w[1,1]/w[1,2],col = "red",lwd=2)
  19. points(X[svm.model$index,],col="blue",cex=2.5,lwd = 2)
  20. abline(a = -b/w[1,2]+margin,b=-w[1,1]/w[1,2],col = "grey",lwd=2,lty=2)
  21. abline(a = -b/w[1,2]-margin,b=-w[1,1]/w[1,2],col = "grey",lwd=2,lty=2)
  22. }
  23. # 增加分割线的散点图
  24. plot(X,col = y,pch = as.numeric(y)+15,cex = 1.5) # 绘制散点图
  25. plot_svc_decision_boundary(svm.model,X) # 增加决策边界和标注支持向量
  26. # SVM对特征缩放敏感
  27. Xs <- data.frame(x1 = c(1,5,3,5),
  28. x2 = c(50,20,80,60))
  29. ys <- factor(c(0,0,1,1))
  30. svm_clf <- svm(x = Xs,y = ys,cost=100,
  31. kernel = "linear",scale = FALSE)
  32. Xs_scale <- apply(Xs,2,scale) # 标准化处理
  33. svm_clf1 <- svm(x = Xs_scale,y = ys,cost=100,
  34. kernel = "linear",scale = FALSE)
  35. par(mfrow=c(1,2))
  36. plot(Xs,col=ys,pch=as.numeric(ys)+15,cex=1.5,main='Unscaled')
  37. plot_svc_decision_boundary(svm_clf,Xs)
  38. plot(Xs_scale,col = ys,pch=as.numeric(ys)+15,cex=1.5,main="scaled")
  39. plot_svc_decision_boundary(svm_clf1,Xs_scale)
  40. par(mfrow=c(1,1))
  41. # 将参数scale设置为TRUE
  42. svm_clf2 <- svm(x = Xs,y = ys,cost=100,
  43. kernel = "linear",scale = TRUE)
  44. # 可以查看标准化的中心和标准差
  45. svm_clf2$x.scale
  46. # 查看手工标准化的均值和标准差
  47. apply(Xs,2,function(x) {c('center' = mean(x,na.rm=TRUE),'scale' = sd(x,na.rm=TRUE))})
  48. # 软间隔分类
  49. X = iris[iris$Species!= 'virginica',1:2] # "Sepal.Length" "Sepal.Width"
  50. y = iris[iris$Species != 'virginica','Species']
  51. svm_smallC <- svm(x = X,y = y,cost = 1,
  52. kernel = "linear",scale = FALSE)
  53. svm_largeC <- svm(x = X,y = y,cost = 100,
  54. kernel = "linear",scale = FALSE)
  55. par(mfrow=c(1,2))
  56. plot(X,col=y,pch=as.numeric(y)+15,main='small cost')
  57. plot_svc_decision_boundary(svm_smallC,X)
  58. plot(X,col=y,pch=as.numeric(y)+15,main='large cost')
  59. plot_svc_decision_boundary(svm_largeC,X)
  60. par(mfrow=c(1,1))
  61. # 非线性支持向量机分类
  62. # 导入数据集
  63. moons <- read.csv('moons.csv')
  64. # 查看数据结构
  65. str(moons)
  66. # 编写绘制决策边界函数
  67. visualize_classifier <- function(model,X,y,xlim,ylim,title = NA){
  68. x1s <- seq(xlim[1],xlim[2],length.out=200)
  69. x2s <- seq(ylim[1],ylim[2],length.out=200)
  70. Z <- expand.grid(x1s,x2s)
  71. colnames(Z) <- colnames(X)
  72. y_pred <- predict(model,Z,type = 'class')
  73. y_pred <- matrix(y_pred,length(x1s))
  74. filled.contour(x1s,x2s,y_pred,
  75. nlevels = 2,
  76. col = RColorBrewer::brewer.pal(length(unique(y)),'Pastel1'),
  77. key.axes = FALSE,
  78. plot.axes = {axis(1);axis(2);
  79. points(X[,1],X[,2],pch=as.numeric(y)+16,col=as.numeric(y)+2,cex=1.5)
  80. },
  81. xlab = colnames(X)[1],ylab = colnames(X)[2]
  82. )
  83. title(main = title)
  84. }
  85. xlim <- c(-1.5,2.5)
  86. ylim <- c(-1,1.5)
  87. # 构建线性支持向量机分类
  88. svm_linear <- svm(x = moons[,1:2],y = factor(moons[,3]),
  89. kernel = 'linear',degree = 1,cost = 10)
  90. # 绘制决策边界
  91. visualize_classifier(svm_linear,moons[,1:2],moons[,3],
  92. xlim,ylim,title = '线性支持向量机分类')
  93. # 构建非线支持向量机分类
  94. svm_poly <- svm(x = moons[,1:2],y = factor(moons[,3]),
  95. kernel = 'polynomial',degree = 3,cost = 5)
  96. # 绘制决策边界
  97. visualize_classifier(svm_poly,moons[,1:2],moons[,3],
  98. xlim,ylim,title = '非线性支持向量机分类')
  99. # 多项式核
  100. svm_poly1 <- svm(x = moons[,1:2],y = factor(moons[,3]),
  101. kernel = 'polynomial',degree = 3,cost = 5,coef0 = 1)
  102. visualize_classifier(svm_poly1,moons[,1:2],moons[,3],
  103. xlim,ylim,'多项式核')
  104. # 增加相似性特征
  105. svm_rbf <- svm(x = moons[,1:2],y = factor(moons[,3]),
  106. kernel='radial',gamma = 0.1, cost = 0.01)
  107. svm_rbf1 <- svm(x = moons[,1:2],y = factor(moons[,3]),
  108. kernel='radial',gamma = 0.1, cost = 1000)
  109. svm_rbf2 <- svm(x = moons[,1:2],y = factor(moons[,3]),
  110. kernel='radial',gamma = 5, cost =1000)
  111. visualize_classifier(svm_rbf,moons[,1:2],moons[,3],
  112. xlim,ylim,'gamma = 0.1, cost = 0.01')
  113. visualize_classifier(svm_rbf1,moons[,1:2],moons[,3],
  114. xlim,ylim,'gamma = 0.1, cost = 1000')
  115. visualize_classifier(svm_rbf2,moons[,1:2],moons[,3],
  116. xlim,ylim,'gamma = 5, cost = 1000')
  117. # 调整支持向量机
  118. # 使用tune.svm函数调整支持向量机
  119. moons$y <- as.factor(moons$y)
  120. tuned <- tune.svm(y ~ .,data = moons,
  121. gamma = 10^(-5:-1),cost = 10^(1:3))
  122. summary(tuned) # 得到模型相关信息
  123. # 利用最佳参数设置支持向量机
  124. model.tuned <- svm(y ~ .,data = moons,
  125. gamma = tuned$best.parameters$gamma,
  126. cost = tuned$best.parameters$cost)
  127. # 对训练集进行类别预测
  128. pred <- predict(model.tuned,newdata = moons[,1:2])
  129. #生成混淆矩阵,观察预测精度
  130. table('actual' = moons$y,
  131. 'prediction'= pred)

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Li_阴宅/article/detail/845235
推荐阅读
相关标签
  

闽ICP备14008679号