赞
踩
ex2分为两个部分,第一个部分为线性的逻辑回归,第二部分则是非线性。
数据集:
将图像画出来:
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
-
-
- # 读取数据
- df = pd.read_csv('ex2data1.txt', names=['x1', 'x2', 'y'])
- df0 = df[df['y'] == 0]
- df1 = df[df['y'] == 1]
- # 画散点图
- plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
- plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
- plt.xlabel('x1')
- plt.ylabel('x2')
- plt.legend('01')
- plt.show()
预测函数h(x)为:
代码实现:
- def g(z):
- return 1 / (1 + np.exp(-z))
-
-
- def h(theta, x):
- return g(np.dot(x, theta))
Cost function:
- def cost(theta, x, y):
- theta = np.matrix(theta)
- left = np.multiply(-y, np.log(g(x*theta)))
- right = np.multiply((1-y), np.log(1-g(x*theta)))
- return np.sum(left-right)/(len(x))
下降梯度:
这里我先手动实现了梯度下降:
- a = 0.01
- for i in range(200000):
- error = g(x * theta) - y
- for i in range(3):
- term = np.multiply(error, x[:, i])
- theta[i, 0] -= a * np.sum(term) / len(x)
经过200000次迭代,效果才比较好:
此时的theta和损失值为:
第一种方法源码:
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
-
-
- # 读取数据
- df = pd.read_csv('ex2data1.txt', names=['x1', 'x2', 'y'])
- df0 = df[df['y'] == 0]
- df1 = df[df['y'] == 1]
- # 画散点图
- plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
- plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
- plt.xlabel('x1')
- plt.ylabel('x2')
- plt.legend('01')
- plt.show()
-
-
- theta = np.matrix(np.zeros((3, 1)))
- x = np.matrix(df.iloc[:, [0, 1]])
- y = np.matrix(df.iloc[:, 2]).T
- x = np.insert(x, 0, np.ones(len(x)), axis=1)
-
-
- def g(z):
- return 1 / (1 + np.exp(-z))
-
-
- def h(theta, x):
- return g(np.dot(x, theta))
-
-
- def cost(theta, x, y):
- theta = np.matrix(theta)
- left = np.multiply(-y, np.log(g(x*theta)))
- right = np.multiply((1-y), np.log(1-g(x*theta)))
- return np.sum(left-right)/(len(x))
-
-
- a = 0.01
- for i in range(200000):
- error = g(x * theta) - y
- for i in range(3):
- term = np.multiply(error, x[:, i])
- theta[i, 0] -= a * np.sum(term) / len(x)
-
- print(theta)
- print(cost(theta, x, y))
- px = [i for i in range(20, 100)]
- py = [-(px[i] * theta[1, 0] + theta[0, 0]) / theta[2, 0] for i in range(len(px))]
-
- plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
- plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
- plt.plot(px, py)
- plt.xlabel('x1')
- plt.ylabel('x2')
- plt.legend('01')
- plt.show()
还有另一种方法进行梯度下降,就是用scipy.optimize优化器来进行最优参数拟合,其余部分与第一种方法类似,多定义一个梯度下降函数,将梯度下降部分改为:
- def gradient(theta, X, y):
- theta = np.matrix(theta).T
- X = np.matrix(X)
- y = np.matrix(y)
-
- grad = np.zeros((3, 1))
-
- error = g(X * theta) - y
-
- for i in range(3):
- term = np.multiply(error, X[:, i])
- grad[i, 0] = np.sum(term) / len(X)
-
- return grad
-
-
- result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(x, y))
- theta = result[0]
拟合效果非常不错:
此时的theta与损失值为:
比第一种方法简直好太多。
第二种方法源码:
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import scipy.optimize as opt
-
-
- # 读取数据
- df = pd.read_csv('ex2data1.txt', names=['x1', 'x2', 'y'])
- df0 = df[df['y'] == 0]
- df1 = df[df['y'] == 1]
- # 初始化参数
- theta = np.matrix(np.zeros((3, 1)))
- x = np.matrix(df.iloc[:, [0, 1]])
- y = np.matrix(df.iloc[:, 2]).T
- x = np.insert(x, 0, np.ones(len(x)), axis=1)
-
-
- def g(z):
- return 1 / (1 + np.exp(-z))
-
-
- def h(theta, x):
- return g(np.dot(x, theta))
-
-
- def cost(theta, x, y):
- theta = np.matrix(theta).T
- left = np.multiply(-y, np.log(g(x*theta)))
- right = np.multiply((1-y), np.log(1-g(x*theta)))
- return np.sum(left-right)/(len(x))
-
-
- def gradient(theta, X, y):
- theta = np.matrix(theta).T
- X = np.matrix(X)
- y = np.matrix(y)
-
- grad = np.zeros((3, 1))
-
- error = g(X * theta) - y
-
- for i in range(3):
- term = np.multiply(error, X[:, i])
- grad[i, 0] = np.sum(term) / len(X)
-
- return grad
-
-
- result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(x, y))
- theta = result[0]
- print(theta)
- print(cost(theta, x, y))
- px = [i for i in range(20, 100)]
- py = [-(px[i] * theta[1] + theta[0]) / theta[2] for i in range(len(px))]
-
- plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
- plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
- plt.plot(px, py)
- plt.xlabel('x1')
- plt.ylabel('x2')
- plt.legend('01')
- plt.show()
数据集:
将其画为散点图展示:
- import pandas as pd
- import matplotlib.pyplot as plt
- import numpy as np
-
-
- # 读取数据
- df = pd.read_csv('ex2data2.txt', names=['x1', 'x2', 'y'])
- df0 = df[df['y'] == 0]
- df1 = df[df['y'] == 1]
- # 画散点图
- plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
- plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
- plt.xlabel('x1')
- plt.ylabel('x2')
- plt.legend('01')
- plt.show()
这里很明显是非线性的关系,查看说明文档发现需要构造特征为:
- # 构造x
- x = df.iloc[:, [0, 1]]
- x1 = df.iloc[:, 0]
- x2 = df.iloc[:, 1]
- degree = 6
- for i in range(1, degree+1):
- for j in range(0, i+1):
- x['F' + str(i-j) + str(j)] = np.power(x1, i-j) * np.power(x2, j)
-
- x.drop('x1', axis=1, inplace=True)
- x.drop('x2', axis=1, inplace=True)
- x = np.matrix(x)
- x = np.insert(x, 0, np.ones(len(x)), axis=1)
然后和第一问一样,实现函数,但是注意这里的cost function要添加正则化项:
- def cost(theta, x, y):
- theta = np.matrix(theta)
- first = -np.multiply(y, np.log(h(theta, x)))
- second = -np.multiply((1-y), np.log(1 - h(theta, x)))
- third = sita * np.sum(np.power(theta, 2)) / len(theta)
- return np.sum(first + second) / len(x) + third
使用优化器进行优化求最优解():
此时的cost为0.45801011485103144。进一步调低,此时结果为:
cost为0.26097691012426083。
源码:
- import pandas as pd
- import matplotlib.pyplot as plt
- import numpy as np
- import scipy.optimize as opt
-
-
- # 读取数据
- df = pd.read_csv('ex2data2.txt', names=['x1', 'x2', 'y'])
- df0 = df[df['y'] == 0]
- df1 = df[df['y'] == 1]
- # 画散点图
- plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
- plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
- plt.xlabel('x1')
- plt.ylabel('x2')
- plt.legend('01')
- plt.show()
-
- # 构造x
- x = df.iloc[:, [0, 1]]
- x1 = df.iloc[:, 0]
- x2 = df.iloc[:, 1]
- degree = 6
- for i in range(1, degree+1):
- for j in range(0, i+1):
- x['F' + str(i-j) + str(j)] = np.power(x1, i-j) * np.power(x2, j)
-
- x.drop('x1', axis=1, inplace=True)
- x.drop('x2', axis=1, inplace=True)
- x = np.matrix(x)
- x = np.insert(x, 0, np.ones(len(x)), axis=1)
-
- # 构造y
- y = np.matrix(df.iloc[:, 2]).T
-
- # 构造theta
- theta = np.zeros(x.shape[1])
- sita = 0
-
-
- # g(z)
- def g(z):
- return 1 / (1 + np.exp(-z))
-
-
- # h(x):
- def h(theta, x):
- return g(np.dot(x, theta.T))
-
-
- def cost(theta, x, y):
- theta = np.matrix(theta)
- first = -np.multiply(y, np.log(h(theta, x)))
- second = -np.multiply((1-y), np.log(1 - h(theta, x)))
- third = sita * np.sum(np.power(theta, 2)) / len(theta)
- return np.sum(first + second) / len(x) + third
-
-
- print(cost(theta, x, y))
-
-
- def gradient(theta, X, y):
- theta = np.matrix(theta).T
- X = np.matrix(X)
- y = np.matrix(y)
- grad = np.zeros((28, 1))
-
- error = g(X * theta) - y
-
- for i in range(28):
- term = np.multiply(error, X[:, i])
- if i == 0:
- grad[i, 0] = np.sum(term) / len(X)
- else:
- grad[i, 0] = np.sum(term) / len(X) + sita * theta[i, 0] / len(X)
-
- return grad
-
-
- result = opt.fmin_tnc(func=cost, x0=theta, fprime=gradient, args=(x, y))
- theta = result[0]
- print(theta)
- print(cost(theta, x, y))
-
- x1 = np.arange(-1, 1, 0.01)
- x2 = np.arange(-1, 1, 0.01)
- temp = []
- for i in range(len(x1)):
- for j in range(len(x2)):
- temp.append([x1[i], x2[j]])
- temp = pd.DataFrame(temp)
- x1 = temp.iloc[:, 0]
- x2 = temp.iloc[:, 1]
- xx = pd.DataFrame()
- for i in range(1, degree+1):
- for j in range(0, i+1):
- xx['F' + str(i-j) + str(j)] = np.power(x1, i-j) * np.power(x2, j)
-
- xx = np.matrix(xx)
- xx = np.insert(xx, 0, np.ones(len(xx)), axis=1)
- theta = np.matrix(theta).T
- res = np.dot(xx, theta)
- res = g(res)
-
- px = []
- x1 = np.arange(-1, 1, 0.01)
- x2 = np.arange(-1, 1, 0.01)
- for i in range(len(res)):
- if abs(res[i, 0] - 0.5) < 0.04:
- px.append([xx[i, 1], xx[i, 2]])
-
- print(len(px))
- for i in range(len(px)):
- plt.scatter(px[i][0], px[i][1], c='g')
- plt.scatter(df0.iloc[:, 0], df0.iloc[:, 1], c='b')
- plt.scatter(df1.iloc[:, 0], df1.iloc[:, 1], c='r')
- plt.show()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。