赞
踩
- ### 11.6 基于支持向量机进行类别预测 ###
- # 构建数据子集
- X <- iris[iris$Species!= 'virginica',2:3] # 自变量:Sepal.Width, Petal.Length
- y <- iris[iris$Species != 'virginica','Species'] # 因变量
- plot(X,col = y,pch = as.numeric(y)+15,cex = 1.5) # 绘制散点图
- # 构建支持向量机分类器
- library(e1071)
- svm.model <- svm(x = X,y = y,kernel = 'linear',degree = 1,scale = FALSE)
- summary(svm.model)
- svm.model$index # 查看支持向量的序号
- svm.model$nSV # 查看各类的支持向量个数
- svm.model$SV # 查看支持向量的自变量值
-
- # 绘制SVM分类器的判别边界实线、支持向量及最大间隔分类
- plot_svc_decision_boundary <- function(svm.model,X) {
- w = t(svm.model$coefs) %*% svm.model$SV
- b = -svm.model$rho
- margin = 1/w[2]
- abline(a = -b/w[1,2],b=-w[1,1]/w[1,2],col = "red",lwd=2)
- points(X[svm.model$index,],col="blue",cex=2.5,lwd = 2)
- abline(a = -b/w[1,2]+margin,b=-w[1,1]/w[1,2],col = "grey",lwd=2,lty=2)
- abline(a = -b/w[1,2]-margin,b=-w[1,1]/w[1,2],col = "grey",lwd=2,lty=2)
- }
- # 增加分割线的散点图
- plot(X,col = y,pch = as.numeric(y)+15,cex = 1.5) # 绘制散点图
- plot_svc_decision_boundary(svm.model,X) # 增加决策边界和标注支持向量
-
- # SVM对特征缩放敏感
- Xs <- data.frame(x1 = c(1,5,3,5),
- x2 = c(50,20,80,60))
- ys <- factor(c(0,0,1,1))
-
- svm_clf <- svm(x = Xs,y = ys,cost=100,
- kernel = "linear",scale = FALSE)
- Xs_scale <- apply(Xs,2,scale) # 标准化处理
- svm_clf1 <- svm(x = Xs_scale,y = ys,cost=100,
- kernel = "linear",scale = FALSE)
- par(mfrow=c(1,2))
- plot(Xs,col=ys,pch=as.numeric(ys)+15,cex=1.5,main='Unscaled')
- plot_svc_decision_boundary(svm_clf,Xs)
- plot(Xs_scale,col = ys,pch=as.numeric(ys)+15,cex=1.5,main="scaled")
- plot_svc_decision_boundary(svm_clf1,Xs_scale)
- par(mfrow=c(1,1))
-
- # 将参数scale设置为TRUE
- svm_clf2 <- svm(x = Xs,y = ys,cost=100,
- kernel = "linear",scale = TRUE)
- # 可以查看标准化的中心和标准差
- svm_clf2$x.scale
- # 查看手工标准化的均值和标准差
- apply(Xs,2,function(x) {c('center' = mean(x,na.rm=TRUE),'scale' = sd(x,na.rm=TRUE))})
-
- # 软间隔分类
- X = iris[iris$Species!= 'virginica',1:2] # "Sepal.Length" "Sepal.Width"
- y = iris[iris$Species != 'virginica','Species']
- svm_smallC <- svm(x = X,y = y,cost = 1,
- kernel = "linear",scale = FALSE)
- svm_largeC <- svm(x = X,y = y,cost = 100,
- kernel = "linear",scale = FALSE)
- par(mfrow=c(1,2))
- plot(X,col=y,pch=as.numeric(y)+15,main='small cost')
- plot_svc_decision_boundary(svm_smallC,X)
- plot(X,col=y,pch=as.numeric(y)+15,main='large cost')
- plot_svc_decision_boundary(svm_largeC,X)
- par(mfrow=c(1,1))
-
- # 非线性支持向量机分类
- # 导入数据集
- moons <- read.csv('moons.csv')
- # 查看数据结构
- str(moons)
-
- # 编写绘制决策边界函数
- visualize_classifier <- function(model,X,y,xlim,ylim,title = NA){
- x1s <- seq(xlim[1],xlim[2],length.out=200)
- x2s <- seq(ylim[1],ylim[2],length.out=200)
- Z <- expand.grid(x1s,x2s)
- colnames(Z) <- colnames(X)
- y_pred <- predict(model,Z,type = 'class')
- y_pred <- matrix(y_pred,length(x1s))
-
- filled.contour(x1s,x2s,y_pred,
- nlevels = 2,
- col = RColorBrewer::brewer.pal(length(unique(y)),'Pastel1'),
- key.axes = FALSE,
- plot.axes = {axis(1);axis(2);
- points(X[,1],X[,2],pch=as.numeric(y)+16,col=as.numeric(y)+2,cex=1.5)
- },
- xlab = colnames(X)[1],ylab = colnames(X)[2]
- )
- title(main = title)
- }
-
- xlim <- c(-1.5,2.5)
- ylim <- c(-1,1.5)
-
- # 构建线性支持向量机分类
- svm_linear <- svm(x = moons[,1:2],y = factor(moons[,3]),
- kernel = 'linear',degree = 1,cost = 10)
- # 绘制决策边界
- visualize_classifier(svm_linear,moons[,1:2],moons[,3],
- xlim,ylim,title = '线性支持向量机分类')
-
- # 构建非线支持向量机分类
- svm_poly <- svm(x = moons[,1:2],y = factor(moons[,3]),
- kernel = 'polynomial',degree = 3,cost = 5)
- # 绘制决策边界
- visualize_classifier(svm_poly,moons[,1:2],moons[,3],
- xlim,ylim,title = '非线性支持向量机分类')
-
- # 多项式核
- svm_poly1 <- svm(x = moons[,1:2],y = factor(moons[,3]),
- kernel = 'polynomial',degree = 3,cost = 5,coef0 = 1)
- visualize_classifier(svm_poly1,moons[,1:2],moons[,3],
- xlim,ylim,'多项式核')
-
- # 增加相似性特征
- svm_rbf <- svm(x = moons[,1:2],y = factor(moons[,3]),
- kernel='radial',gamma = 0.1, cost = 0.01)
- svm_rbf1 <- svm(x = moons[,1:2],y = factor(moons[,3]),
- kernel='radial',gamma = 0.1, cost = 1000)
- svm_rbf2 <- svm(x = moons[,1:2],y = factor(moons[,3]),
- kernel='radial',gamma = 5, cost =1000)
- visualize_classifier(svm_rbf,moons[,1:2],moons[,3],
- xlim,ylim,'gamma = 0.1, cost = 0.01')
- visualize_classifier(svm_rbf1,moons[,1:2],moons[,3],
- xlim,ylim,'gamma = 0.1, cost = 1000')
- visualize_classifier(svm_rbf2,moons[,1:2],moons[,3],
- xlim,ylim,'gamma = 5, cost = 1000')
-
- # 调整支持向量机
- # 使用tune.svm函数调整支持向量机
- moons$y <- as.factor(moons$y)
- tuned <- tune.svm(y ~ .,data = moons,
- gamma = 10^(-5:-1),cost = 10^(1:3))
- summary(tuned) # 得到模型相关信息
-
- # 利用最佳参数设置支持向量机
- model.tuned <- svm(y ~ .,data = moons,
- gamma = tuned$best.parameters$gamma,
- cost = tuned$best.parameters$cost)
- # 对训练集进行类别预测
- pred <- predict(model.tuned,newdata = moons[,1:2])
- #生成混淆矩阵,观察预测精度
- table('actual' = moons$y,
- 'prediction'= pred)
-
赞
踩
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。