当前位置:   article > 正文

吴恩达机器学习课后作业ex2(python实现)_吴恩达机器学习第二次作业

吴恩达机器学习第二次作业

ex2分为两个部分,第一个部分为线性的逻辑回归,第二部分则是非线性。

第一部分

数据集:

将图像画出来:

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. # 读取数据
  5. df = pd.read_csv('ex2data1.txt', names=['x1', 'x2', 'y'])
  6. df0 = df[df['y'] == 0]
  7. df1 = df[df['y'] == 1]
  8. # 画散点图
  9. plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
  10. plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
  11. plt.xlabel('x1')
  12. plt.ylabel('x2')
  13. plt.legend('01')
  14. plt.show()

sigmoid函数为:

 预测函数h(x)为:

 代码实现:

  1. def g(z):
  2. return 1 / (1 + np.exp(-z))
  3. def h(theta, x):
  4. return g(np.dot(x, theta))

Cost function:

  1. def cost(theta, x, y):
  2. theta = np.matrix(theta)
  3. left = np.multiply(-y, np.log(g(x*theta)))
  4. right = np.multiply((1-y), np.log(1-g(x*theta)))
  5. return np.sum(left-right)/(len(x))

下降梯度:

这里我先手动实现了梯度下降

  1. a = 0.01
  2. for i in range(200000):
  3. error = g(x * theta) - y
  4. for i in range(3):
  5. term = np.multiply(error, x[:, i])
  6. theta[i, 0] -= a * np.sum(term) / len(x)

经过200000次迭代,效果才比较好:

此时的theta和损失值为:

第一种方法源码:

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. # 读取数据
  5. df = pd.read_csv('ex2data1.txt', names=['x1', 'x2', 'y'])
  6. df0 = df[df['y'] == 0]
  7. df1 = df[df['y'] == 1]
  8. # 画散点图
  9. plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
  10. plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
  11. plt.xlabel('x1')
  12. plt.ylabel('x2')
  13. plt.legend('01')
  14. plt.show()
  15. theta = np.matrix(np.zeros((3, 1)))
  16. x = np.matrix(df.iloc[:, [0, 1]])
  17. y = np.matrix(df.iloc[:, 2]).T
  18. x = np.insert(x, 0, np.ones(len(x)), axis=1)
  19. def g(z):
  20. return 1 / (1 + np.exp(-z))
  21. def h(theta, x):
  22. return g(np.dot(x, theta))
  23. def cost(theta, x, y):
  24. theta = np.matrix(theta)
  25. left = np.multiply(-y, np.log(g(x*theta)))
  26. right = np.multiply((1-y), np.log(1-g(x*theta)))
  27. return np.sum(left-right)/(len(x))
  28. a = 0.01
  29. for i in range(200000):
  30. error = g(x * theta) - y
  31. for i in range(3):
  32. term = np.multiply(error, x[:, i])
  33. theta[i, 0] -= a * np.sum(term) / len(x)
  34. print(theta)
  35. print(cost(theta, x, y))
  36. px = [i for i in range(20, 100)]
  37. py = [-(px[i] * theta[1, 0] + theta[0, 0]) / theta[2, 0] for i in range(len(px))]
  38. plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
  39. plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
  40. plt.plot(px, py)
  41. plt.xlabel('x1')
  42. plt.ylabel('x2')
  43. plt.legend('01')
  44. plt.show()

还有另一种方法进行梯度下降,就是用scipy.optimize优化器来进行最优参数拟合,其余部分与第一种方法类似,多定义一个梯度下降函数,将梯度下降部分改为:

  1. def gradient(theta, X, y):
  2. theta = np.matrix(theta).T
  3. X = np.matrix(X)
  4. y = np.matrix(y)
  5. grad = np.zeros((3, 1))
  6. error = g(X * theta) - y
  7. for i in range(3):
  8. term = np.multiply(error, X[:, i])
  9. grad[i, 0] = np.sum(term) / len(X)
  10. return grad
  11. result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(x, y))
  12. theta = result[0]

拟合效果非常不错:

 

此时的theta与损失值为:

比第一种方法简直好太多。

第二种方法源码:

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. import scipy.optimize as opt
  5. # 读取数据
  6. df = pd.read_csv('ex2data1.txt', names=['x1', 'x2', 'y'])
  7. df0 = df[df['y'] == 0]
  8. df1 = df[df['y'] == 1]
  9. # 初始化参数
  10. theta = np.matrix(np.zeros((3, 1)))
  11. x = np.matrix(df.iloc[:, [0, 1]])
  12. y = np.matrix(df.iloc[:, 2]).T
  13. x = np.insert(x, 0, np.ones(len(x)), axis=1)
  14. def g(z):
  15. return 1 / (1 + np.exp(-z))
  16. def h(theta, x):
  17. return g(np.dot(x, theta))
  18. def cost(theta, x, y):
  19. theta = np.matrix(theta).T
  20. left = np.multiply(-y, np.log(g(x*theta)))
  21. right = np.multiply((1-y), np.log(1-g(x*theta)))
  22. return np.sum(left-right)/(len(x))
  23. def gradient(theta, X, y):
  24. theta = np.matrix(theta).T
  25. X = np.matrix(X)
  26. y = np.matrix(y)
  27. grad = np.zeros((3, 1))
  28. error = g(X * theta) - y
  29. for i in range(3):
  30. term = np.multiply(error, X[:, i])
  31. grad[i, 0] = np.sum(term) / len(X)
  32. return grad
  33. result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(x, y))
  34. theta = result[0]
  35. print(theta)
  36. print(cost(theta, x, y))
  37. px = [i for i in range(20, 100)]
  38. py = [-(px[i] * theta[1] + theta[0]) / theta[2] for i in range(len(px))]
  39. plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
  40. plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
  41. plt.plot(px, py)
  42. plt.xlabel('x1')
  43. plt.ylabel('x2')
  44. plt.legend('01')
  45. plt.show()

 第二部分

数据集:

将其画为散点图展示:

  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3. import numpy as np
  4. # 读取数据
  5. df = pd.read_csv('ex2data2.txt', names=['x1', 'x2', 'y'])
  6. df0 = df[df['y'] == 0]
  7. df1 = df[df['y'] == 1]
  8. # 画散点图
  9. plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
  10. plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
  11. plt.xlabel('x1')
  12. plt.ylabel('x2')
  13. plt.legend('01')
  14. plt.show()

 这里很明显是非线性的关系,查看说明文档发现需要构造特征为:

 

  1. # 构造x
  2. x = df.iloc[:, [0, 1]]
  3. x1 = df.iloc[:, 0]
  4. x2 = df.iloc[:, 1]
  5. degree = 6
  6. for i in range(1, degree+1):
  7. for j in range(0, i+1):
  8. x['F' + str(i-j) + str(j)] = np.power(x1, i-j) * np.power(x2, j)
  9. x.drop('x1', axis=1, inplace=True)
  10. x.drop('x2', axis=1, inplace=True)
  11. x = np.matrix(x)
  12. x = np.insert(x, 0, np.ones(len(x)), axis=1)

然后和第一问一样,实现函数,但是注意这里的cost function要添加正则化项:

  1. def cost(theta, x, y):
  2. theta = np.matrix(theta)
  3. first = -np.multiply(y, np.log(h(theta, x)))
  4. second = -np.multiply((1-y), np.log(1 - h(theta, x)))
  5. third = sita * np.sum(np.power(theta, 2)) / len(theta)
  6. return np.sum(first + second) / len(x) + third

 使用优化器进行优化求最优解(\lambda =0.001):

此时的cost为0.45801011485103144。进一步调低\lambda =0,此时结果为:

​​​​​​​

 cost为0.26097691012426083。

源码:

  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3. import numpy as np
  4. import scipy.optimize as opt
  5. # 读取数据
  6. df = pd.read_csv('ex2data2.txt', names=['x1', 'x2', 'y'])
  7. df0 = df[df['y'] == 0]
  8. df1 = df[df['y'] == 1]
  9. # 画散点图
  10. plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
  11. plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
  12. plt.xlabel('x1')
  13. plt.ylabel('x2')
  14. plt.legend('01')
  15. plt.show()
  16. # 构造x
  17. x = df.iloc[:, [0, 1]]
  18. x1 = df.iloc[:, 0]
  19. x2 = df.iloc[:, 1]
  20. degree = 6
  21. for i in range(1, degree+1):
  22. for j in range(0, i+1):
  23. x['F' + str(i-j) + str(j)] = np.power(x1, i-j) * np.power(x2, j)
  24. x.drop('x1', axis=1, inplace=True)
  25. x.drop('x2', axis=1, inplace=True)
  26. x = np.matrix(x)
  27. x = np.insert(x, 0, np.ones(len(x)), axis=1)
  28. # 构造y
  29. y = np.matrix(df.iloc[:, 2]).T
  30. # 构造theta
  31. theta = np.zeros(x.shape[1])
  32. sita = 0
  33. # g(z)
  34. def g(z):
  35. return 1 / (1 + np.exp(-z))
  36. # h(x):
  37. def h(theta, x):
  38. return g(np.dot(x, theta.T))
  39. def cost(theta, x, y):
  40. theta = np.matrix(theta)
  41. first = -np.multiply(y, np.log(h(theta, x)))
  42. second = -np.multiply((1-y), np.log(1 - h(theta, x)))
  43. third = sita * np.sum(np.power(theta, 2)) / len(theta)
  44. return np.sum(first + second) / len(x) + third
  45. print(cost(theta, x, y))
  46. def gradient(theta, X, y):
  47. theta = np.matrix(theta).T
  48. X = np.matrix(X)
  49. y = np.matrix(y)
  50. grad = np.zeros((28, 1))
  51. error = g(X * theta) - y
  52. for i in range(28):
  53. term = np.multiply(error, X[:, i])
  54. if i == 0:
  55. grad[i, 0] = np.sum(term) / len(X)
  56. else:
  57. grad[i, 0] = np.sum(term) / len(X) + sita * theta[i, 0] / len(X)
  58. return grad
  59. result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(x, y))
  60. theta = result[0]
  61. print(theta)
  62. print(cost(theta, x, y))
  63. x1 = np.arange(-1, 1, 0.01)
  64. x2 = np.arange(-1, 1, 0.01)
  65. temp = []
  66. for i in range(len(x1)):
  67. for j in range(len(x2)):
  68. temp.append([x1[i], x2[j]])
  69. temp = pd.DataFrame(temp)
  70. x1 = temp.iloc[:, 0]
  71. x2 = temp.iloc[:, 1]
  72. xx = pd.DataFrame()
  73. for i in range(1, degree+1):
  74. for j in range(0, i+1):
  75. xx['F' + str(i-j) + str(j)] = np.power(x1, i-j) * np.power(x2, j)
  76. xx = np.matrix(xx)
  77. xx = np.insert(xx, 0, np.ones(len(xx)), axis=1)
  78. theta = np.matrix(theta).T
  79. res = np.dot(xx, theta)
  80. res = g(res)
  81. px = []
  82. x1 = np.arange(-1, 1, 0.01)
  83. x2 = np.arange(-1, 1, 0.01)
  84. for i in range(len(res)):
  85. if abs(res[i, 0] - 0.5) < 0.04:
  86. px.append([xx[i, 1], xx[i, 2]])
  87. print(len(px))
  88. for i in range(len(px)):
  89. plt.scatter(px[i][0], px[i][1], c='g')
  90. plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
  91. plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
  92. plt.show()

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/2023面试高手/article/detail/354692
推荐阅读
相关标签
  

闽ICP备14008679号