当前位置:   article > 正文

深度学习入门一阶段demo练习:利用线性回归算法,在波士顿房价数据集上实现预测任务,要求不调用现成的线性回归库函数,利用numpy编写线性回归算法,并通过梯度下降的方式迭代更新线性回归的参数_不掉用库实现boston线性回归

不掉用库实现boston线性回归

根据个人经验总结的深度学习入门路线(简单快速)
https://blog.csdn.net/weixin_44414948/article/details/109704871

深度学习入门一阶段demo练习:
https://blog.csdn.net/weixin_44414948/article/details/109864551

demo任务:

利用线性回归算法,在波士顿房价数据集上实现预测任务,要求不调用现成的线性回归库函数,利用numpy编写线性回归算法,并通过梯度下降的方式迭代更新线性回归的参数(不直接使用正规方程求解),准确率达到85%以上。

波士顿房价数据集部分数据如下图所示:
在这里插入图片描述

示例代码(不调用现成的线性回归函数库):

import numpy as np
import math

def load_boston_dataset(path = r'./housing.data'):
    '''
    :param: path
    :return: training_data,test_data
    '''

    data = np.fromfile(path,sep=' ',count=-1)
    # print(data.shape)

    feature_name = ['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE',
                      'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']
    data = data.reshape(math.ceil(data.shape[0]/len(feature_name)),len(feature_name))
    # print(data.shape)

    # Training set---> 80% and test set--->20%
    ratio = 0.8
    training_data = data[:int(ratio*data.shape[0])]
    test_data = data[int(ratio*data.shape[0]):]
    # print(traning_data)

    # Min-max normalization
    maximums = training_data.max(axis=0)
    minimums = training_data.min(axis=0)
    # print(maximums,'\n',minimums)

    for i in range(len(feature_name)):
        data[:,i] = (data[:,i] - minimums[i])/(maximums[i] - minimums[i])

    training_data = data[:int(ratio*data.shape[0])]
    test_data = data[int(ratio*data.shape[0]):]

    return training_data,test_data


def initialize_parameters(X,Y):
    '''

    :param X: training_set[;,:13]   (num_examples,num_feature)
    :param Y: training_set[;,13:]   (num_examples,1)
    n_x: inpuit
    n_y: output
    :returnW,b: weight bias
    '''
    n_x = X.shape[1]
    n_y = Y.shape[0]
    # print(X.shape, '\n', Y.shape)

    W = np.random.randn(n_x,1)
    b = 0

    # print(W.shape,'\n',b.shape)
    assert (W.shape == (n_x, 1))  # w的维度是(n_x,1)
    assert (isinstance(b, float) or isinstance(b, int))  # b的类型是float或者是int

    parameters ={'W':W,
                 'b':b}

    return parameters


def forward_propagation(X, parameters):
    '''

    :param X:特征
    :param parameters:初始化的参数
    :return:z 预测目标值
    '''
    W = parameters['W']
    b = parameters['b']

    z = np.dot(X,W) + b

    return z


def compute_cost(z,Y):
    '''
    :param z: 预测值
    :param Y: 实际值
    :return: cost
    '''

    m = Y.shape[0]
    cost = (1/m)*np.sum(np.power((z-Y),2))

    return cost


def backward_propagation(parameters,X,Y,z):
    '''
    :param parameters,X,Y,z:计算反向传播所需数据
    :return: grads
    '''

    W = parameters['W']
    b = parameters['b']

    # print(Y.shape,z.shape,X.shape)
    m = Y.shape[0]
    dz = z - Y
    dW = (1/m)*np.dot(X.T,dz)
    db = (1/m)*np.sum(dz)
    # print(db.shape)

    assert (dz.shape == z.shape)
    assert (dW.shape == W.shape)
    assert (isinstance(b, float) or isinstance(b, int))

    grads = {'dz':dz,
             'dW':dW,
             'db':db}

    return grads


def update_parameters(parameters,grads,learning_rate):
    '''
    :param parameters: W,b 当前的W,b
    :param grads: 梯度
    :param learning_rate:学习率
    :return: parameters 更新后的参数
    '''

    W = parameters['W']
    b = parameters['b']

    dW = grads['dW']
    db = grads['db']

    W = W - dW * learning_rate
    b = b - db * learning_rate

    parameters ={'W':W,
                 'b':b}

    return parameters


def predict(parameters,X_test):
    '''
    :param parameters: 训练好的参数
    :param test_data: 测试集
    :return: Y_prediction 预测值
    '''

    W = parameters['W']
    b = parameters['b']

    Y_prediction = np.dot(X_test,W) + b

    # print(Y_prediction.shape,X_test[:,13:].shape)

    return Y_prediction


def model(training_data,test_data,num_iterations=2000,learning_rate=0.05,print_cost=False):

    X_train,Y_train = training_data[:,:13],training_data[:,13:]
    X_test,Y_test = test_data[:,:13],test_data[:,13:]
    parameters = initialize_parameters(X_train,Y_train)

    for i in range(num_iterations):
        Z = forward_propagation(X_train, parameters)
        train_cost = compute_cost(Z,Y_train)
        grads = backward_propagation(parameters,X_train,Y_train,Z)
        parameters = update_parameters(parameters,grads,learning_rate)

        if train_cost:
           if i % 1000 == 0:
               print("第" + str(i) + "次循环,cost 为:" + str(train_cost))

    Y_prediction_train = predict(parameters, X_train)
    Y_prediction_test = predict(parameters,X_test)

    # test_cost = compute_cost(Y_prediction_test,Y_test)
    # print(Y_prediction_train.shape,'\n',Y_train.shape)

    # Mean Absolute Error  np.power((z-Y),2)
    print("train accuracy: {} %".format(100 - np.mean(np.power((Y_prediction_train - Y_train),2) * 100)))
    print("test accuracy: {} %".format(100 - np.mean(np.power((Y_prediction_test - Y_test),2) * 100)))


    return parameters


training_data,test_data = load_boston_dataset()
parameters = model(training_data,test_data,num_iterations=5000,learning_rate=0.05,print_cost=True)

# print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
# print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))
# print(parameters)
  • 1
  • 2
  • 3
  • 4
  • 5
  • 6
  • 7
  • 8
  • 9
  • 10
  • 11
  • 12
  • 13
  • 14
  • 15
  • 16
  • 17
  • 18
  • 19
  • 20
  • 21
  • 22
  • 23
  • 24
  • 25
  • 26
  • 27
  • 28
  • 29
  • 30
  • 31
  • 32
  • 33
  • 34
  • 35
  • 36
  • 37
  • 38
  • 39
  • 40
  • 41
  • 42
  • 43
  • 44
  • 45
  • 46
  • 47
  • 48
  • 49
  • 50
  • 51
  • 52
  • 53
  • 54
  • 55
  • 56
  • 57
  • 58
  • 59
  • 60
  • 61
  • 62
  • 63
  • 64
  • 65
  • 66
  • 67
  • 68
  • 69
  • 70
  • 71
  • 72
  • 73
  • 74
  • 75
  • 76
  • 77
  • 78
  • 79
  • 80
  • 81
  • 82
  • 83
  • 84
  • 85
  • 86
  • 87
  • 88
  • 89
  • 90
  • 91
  • 92
  • 93
  • 94
  • 95
  • 96
  • 97
  • 98
  • 99
  • 100
  • 101
  • 102
  • 103
  • 104
  • 105
  • 106
  • 107
  • 108
  • 109
  • 110
  • 111
  • 112
  • 113
  • 114
  • 115
  • 116
  • 117
  • 118
  • 119
  • 120
  • 121
  • 122
  • 123
  • 124
  • 125
  • 126
  • 127
  • 128
  • 129
  • 130
  • 131
  • 132
  • 133
  • 134
  • 135
  • 136
  • 137
  • 138
  • 139
  • 140
  • 141
  • 142
  • 143
  • 144
  • 145
  • 146
  • 147
  • 148
  • 149
  • 150
  • 151
  • 152
  • 153
  • 154
  • 155
  • 156
  • 157
  • 158
  • 159
  • 160
  • 161
  • 162
  • 163
  • 164
  • 165
  • 166
  • 167
  • 168
  • 169
  • 170
  • 171
  • 172
  • 173
  • 174
  • 175
  • 176
  • 177
  • 178
  • 179
  • 180
  • 181
  • 182
  • 183
  • 184
  • 185
  • 186
  • 187
  • 188
  • 189
  • 190
  • 191
  • 192
  • 193
  • 194
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/小蓝xlanll/article/detail/173391?site
推荐阅读
相关标签
  

闽ICP备14008679号