当前位置:   article > 正文

逻辑回归的讲解和代码_逻辑回归代码解读

逻辑回归代码解读

逻辑回归模型是由以下条件概率分布表示的分类模型。

逻辑回归模型源自逻辑分布,其分布函数使S形函数;

逻辑回归:用于分类问题中,预测值为离散值;算法的性质是输出值永远在0和1之间;

逻辑回归的模型假设:

h(x)的作用:对于给定的输入变量,根据选择的参数计算输出变量=1的可能性,

代价函数:

梯度下降算法:

高级优化算法:共轭梯度法、BFGS变尺度法、L-BFGS限制变尺度法、fminunc无约束最小化函数

正则化:保留所有的特征,减小参数的大小;

其中lamda是正则化参数,lamda越大,参数越小。因为需要最小化代价函数,但是加上了尾部的这一部分,尾部越大,则整个代价函数越大,则theta越小,才能保证最小的代价函数。


代码部分,最重要的是实现代价函数和sigmoid函数。

  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. import scipy.optimize as opt
  4. from plotData import *
  5. import costFunctionReg as cfr
  6. import plotDecisionBoundary as pdb
  7. import predict as predict
  8. import mapFeature as mf
  9. plt.ion()
  10. # Load data
  11. # The first two columns contain the exam scores and the third column contains the label.
  12. data = np.loadtxt('ex2data2.txt', delimiter=',')
  13. X = data[:, 0:2]
  14. y = data[:, 2]
  15. plot_data(X, y)
  16. plt.xlabel('Microchip Test 1')
  17. plt.ylabel('Microchip Test 2')
  18. plt.legend(['y = 1', 'y = 0'])
  19. input('Program paused. Press ENTER to continue')
  20. # ===================== Part 1: Regularized Logistic Regression =====================
  21. X = mf.map_feature(X[:, 0], X[:, 1])
  22. # Initialize fitting parameters
  23. initial_theta = np.zeros(X.shape[1])
  24. # Set regularization parameter lambda to 1
  25. lmd = 1
  26. # Compute and display initial cost and gradient for regularized logistic regression
  27. cost, grad = cfr.cost_function_reg(initial_theta, X, y, lmd)
  28. np.set_printoptions(formatter={'float': '{: 0.4f}\n'.format})
  29. print('Cost at initial theta (zeros): {}'.format(cost))
  30. print('Expected cost (approx): 0.693')
  31. print('Gradient at initial theta (zeros) - first five values only: \n{}'.format(grad[0:5]))
  32. print('Expected gradients (approx) - first five values only: \n 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115')
  33. input('Program paused. Press ENTER to continue')
  34. # Compute and display cost and gradient with non-zero theta
  35. test_theta = np.ones(X.shape[1])
  36. cost, grad = cfr.cost_function_reg(test_theta, X, y, lmd)
  37. print('Cost at test theta: {}'.format(cost))
  38. print('Expected cost (approx): 2.13')
  39. print('Gradient at test theta - first five values only: \n{}'.format(grad[0:5]))
  40. print('Expected gradients (approx) - first five values only: \n 0.3460\n 0.0851\n 0.1185\n 0.1506\n 0.0159')
  41. input('Program paused. Press ENTER to continue')
  42. # ===================== Part 2: Regularization and Accuracies =====================
  43. # Optional Exercise:
  44. # In this part, you will get to try different values of lambda and
  45. # see how regularization affects the decision boundary
  46. #
  47. # Try the following values of lambda (0, 1, 10, 100).
  48. #
  49. # How does the decision boundary change when you vary lambda? How does
  50. # the training set accuracy vary?
  51. #
  52. # Initializa fitting parameters
  53. initial_theta = np.zeros(X.shape[1])
  54. # Set regularization parameter lambda to 1 (you should vary this)
  55. lmd = 1
  56. # Optimize
  57. def cost_func(t):
  58. return cfr.cost_function_reg(t, X, y, lmd)[0]
  59. def grad_func(t):
  60. return cfr.cost_function_reg(t, X, y, lmd)[1]
  61. theta, cost, *unused = opt.fmin_bfgs(f=cost_func, fprime=grad_func, x0=initial_theta, maxiter=400, full_output=True, disp=False) #使用的是优化库函数里面的牛顿法
  62. # Plot boundary
  63. print('Plotting decision boundary ...')
  64. pdb.plot_decision_boundary(theta, X, y)
  65. plt.title('lambda = {}'.format(lmd))
  66. plt.xlabel('Microchip Test 1')
  67. plt.ylabel('Microchip Test 2')
  68. # Compute accuracy on our training set
  69. p = predict.predict(theta, X)
  70. print('Train Accuracy: {:0.4f}'.format(np.mean(y == p) * 100))
  71. print('Expected accuracy (with lambda = 1): 83.1 (approx)')
  72. input('ex2_reg Finished. Press ENTER to exit')
  73. import numpy as np
  74. from sigmoid import *
  75. def cost_function_reg(theta, X, y, lmd):
  76. m = y.size
  77. hypothesis = sigmoid(np.dot(X, theta))
  78. reg_theta = theta[1:]
  79. cost = np.sum(-y * np.log(hypothesis) - (1 - y) * np.log(1 - hypothesis)) / m \
  80. + (lmd / (2 * m)) * np.sum(reg_theta * reg_theta)
  81. normal_grad = (np.dot(X.T, hypothesis - y) / m).flatten()
  82. grad[0] = normal_grad[0]
  83. grad[1:] = normal_grad[1:] + reg_theta * (lmd / m)
  84. # ===========================================================
  85. return cost, grad
  86. import matplotlib.pyplot as plt
  87. import numpy as np
  88. from plotData import *
  89. from mapFeature import *
  90. def plot_decision_boundary(theta, X, y):
  91. plot_data(X[:, 1:3], y)
  92. if X.shape[1] <= 3:
  93. # Only need two points to define a line, so choose two endpoints
  94. plot_x = np.array([np.min(X[:, 1]) - 2, np.max(X[:, 1]) + 2])
  95. # Calculate the decision boundary line
  96. plot_y = (-1/theta[2]) * (theta[1]*plot_x + theta[0])
  97. plt.plot(plot_x, plot_y)
  98. plt.legend(['Decision Boundary', 'Admitted', 'Not admitted'], loc=1)
  99. plt.axis([30, 100, 30, 100])
  100. else:
  101. # Here is the grid range
  102. u = np.linspace(-1, 1.5, 50)
  103. v = np.linspace(-1, 1.5, 50)
  104. z = np.zeros((u.size, v.size))
  105. # Evaluate z = theta*x over the grid
  106. for i in range(0, u.size):
  107. for j in range(0, v.size):
  108. z[i, j] = np.dot(map_feature(u[i], v[j]), theta)
  109. z = z.T
  110. # Plot z = 0
  111. # Notice you need to specify the range [0, 0]
  112. cs = plt.contour(u, v, z, levels=[0], colors='r', label='Decision Boundary')
  113. plt.legend([cs.collections[0]], ['Decision Boundary'])
  114. import numpy as np
  115. def map_feature(x1, x2):
  116. degree = 6
  117. x1 = x1.reshape((x1.size, 1))
  118. x2 = x2.reshape((x2.size, 1))
  119. result = np.ones(x1[:, 0].shape)
  120. for i in range(1, degree + 1):
  121. for j in range(0, i + 1):
  122. result = np.c_[result, (x1**(i-j)) * (x2**j)]
  123. return result
  124. import matplotlib.pyplot as plt
  125. import numpy as np
  126. def plot_data(X, y):
  127. plt.figure()
  128. pos = np.where(y == 1)[0] #输出满足条件的坐标
  129. neg = np.where(y == 0)[0]
  130. plt.scatter(X[pos, 0], X[pos, 1], marker="+", c='b')
  131. plt.scatter(X[neg, 0], X[neg, 1], marker="o", c='y')
  132. import numpy as np
  133. from sigmoid import *
  134. def predict(theta, X):
  135. m = X.shape[0]
  136. p = np.zeros(m)
  137. p = sigmoid(np.dot(X, theta))
  138. pos = np.where(p >= 0.5)
  139. neg = np.where(p < 0.5)
  140. p[pos] = 1
  141. p[neg] = 0
  142. # ===========================================================
  143. return p
  144. import numpy as np
  145. def sigmoid(z):
  146. g = np.zeros(z.size)
  147. g = 1 / (1 + np.exp(-z))
  148. return g

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/Gausst松鼠会/article/detail/279301
推荐阅读
相关标签
  

闽ICP备14008679号