赞
踩
逻辑回归的一种二分类。我们先来看下以下公式:
z
是Sigmoid
函数的输入:
我们可以看出上式子,当z = 0
时,z
不断的减小时,z
不断的增大时,
所以我们就可以利用Sigmoid
函数,当label
判断为0
;当label
判断为1
。
x
是特征的值,而逻辑回归模型训练的就是最优的权值w
。
我们可以使用偏导来确定权值优化的最快的方向,例如二维:
x
移动方向为
y
移动方向为
def loadDataSet():
dataMat = []; labelMat = []
fr = open('testSet.txt')
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]), float(lineArr[1])])
labelMat.append(int(lineArr[2]))
return dataMat,labelMat
def sigmoid(inX):
return 1.0/(1+exp(-inX))
这里我们给出的是优化后的代码:
# numIter 迭代次数 默认150次
def stocGradAscent1(dataMatrix, classLabels, numIter=150):
m,n = shape(dataMatrix)
weights = ones(n)
for j in range(numIter):
dataIndex = range(m)
for i in range(m):
# 每次迭代都更新alpha
# 可以发现alpha值越来越小
# 确保每次移动的步长越来越小
alpha = 4/(1.0+j+i)+0.0001
# 随机得到某行
randIndex = int(random.uniform(0,len(dataIndex)))
# 预测结果
h = sigmoid(sum(dataMatrix[randIndex]*weights))
# 得到误差
error = classLabels[randIndex] - h
# 更新权值
weights = weights + alpha * error * dataMatrix[randIndex]
# 移除该行
del(dataIndex[randIndex])
return weights
def classifyVector(inX, weights):
prob = sigmoid(sum(inX*weights))
if prob > 0.5: return 1.0
else: return 0.0
package Logistic
import scala.io.Source
import scala.collection.mutable.ArrayBuffer
import scala.util.Random
object Logistic {
def loadDataSet() = {
val dataMat = new ArrayBuffer[Array[Double]]
val labelMat = new ArrayBuffer[Double]
val fr = Source.fromFile("LogisticTestSet.txt")
for (line <- fr.getLines()) {
val lineArr = line.trim().split("\t").map(_.toDouble)
dataMat.append(Array(1.0, lineArr(0), lineArr(1)))
labelMat.append(lineArr(2))
}
(dataMat.toArray, labelMat.toArray)
}
def sigmoid(inX: Double) = {
1.0 / (1 + math.exp(-inX))
}
def stocGradAscent1(dataMatrix: Array[Array[Double]], classLabels: Array[Double], numIter: Int = 150) = {
val m = dataMatrix.length
val n = dataMatrix(0).length
var weights = Array.fill(n)(1.0)
for (j <- 1 to numIter) {
var dataIndex: ArrayBuffer[Int] = ArrayBuffer.empty
for (loc <- 0 to m - 1) dataIndex.append(loc)
for (i <- 0 to m - 1) {
val alpha = 4 / (1.0 + j + i) + 0.0001
val randIndex = Random.nextInt(dataIndex.length)
//go to 0 because of the constant
val rowZipWeight = dataMatrix(dataIndex(randIndex)).zip(weights)
val h = sigmoid(rowZipWeight.map(x => x._1 * x._2).sum)
val error = classLabels(randIndex) - h
weights = rowZipWeight.map(x => x._1 + alpha * error * x._2)
dataIndex.remove(randIndex)
}
}
weights
}
def classifyVector(inX: Array[Double], weights: Array[Double]) = {
val prob = sigmoid(inX.zip(weights).map(x => x._1 * x._2).sum)
if (prob > 0.5) 1.0 else 0.0
}
def main(args: Array[String]): Unit = {
val dataSet = loadDataSet()
val dataMatrix = dataSet._1
val classLabels = dataSet._2
val weights = stocGradAscent1(dataMatrix, classLabels,500)
val result = dataMatrix.map(x => classifyVector(x, weights))
println(result.mkString("\n"))
println(result.zip(classLabels).filter(x => x._1 == x._2).length.toDouble / classLabels.length)
}
}
/*
运行结果:
0.0
1.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
1.0
0.0
1.0
1.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
0.0
1.0
0.0
0.0
0.0
1.0
1.0
0.0
0.0
1.0
1.0
1.0
0.0
0.0
1.0
1.0
0.0
0.0
1.0
1.0
0.0
0.0
1.0
0.0
0.0
0.9973131930172848 -1.6933548385975405 -0.5646293142100435
0.68
*/
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。