- #theta多了theta0但是X还没有增加1列,用训练出来的模型,预测结果
- def predict(X, theta):
- X = np.array(X)
- m,n = np.shape(X)
- X = np.hstack([np.ones((m, 1)), X]) #为X增加一列1
- h = np.dot(X, theta)
- return h
- #coding=utf-8
- import numpy as np
- import grandDescent
- import costFunction
- import loadData
- import predict
- import write2File
- import rmse
- import os
- import random
- import matplotlib.pyplot as plt
- from pylab import plot,show
- from scipy import stats
- def handle_data_self():
- X_train = np.array([0,1,2,3,4,5,6,7,8,9]).reshape(10, 1)
- y_train = np.array([0,2,3,4,5,6,7,4,9,10]).reshape(10, 1)
- X_cross = np.copy(X_train)
- y_cross = np.copy(y_train)
- X_test = np.array([0,1,2,3,4,5,6,7,8,9, 10,11,23,1]).reshape(14,1)
- plt.scatter(X_train, y_train, color="Red")
- # plt.show()
- return X_train,y_train,X_cross,y_cross,X_test
- import numpy as np #系统库
- import grandDescent
- import costFunction
- import loadData
- import predict
- import write2File
- import rmse
- import os #库
- import random #库
- import matplotlib.pyplot as plt #库
- def regression_simple(X, y, X1, y1, option):
- alpha = float(option["alpha"])
- maxCycles = int(option["maxCycle"])
- lamb = float(option["lamb"])
- save = bool(option["saveRecord"])
- add = bool(option["add"])
- optGoal = option["optGoal"]
- methods = option["method"]
- thetaPath = option["thetaWritePath"] if option.has_key("thetaWritePath") else None
- m,n = np.shape(X)
- J_train = None
- theta = None
- if methods == "stocGradDescent":
- J_train, theta = grandDescent.stocGradDescent(np.copy(X), y, maxCycles, alpha, lamb)
- elif methods == "grandDescent":
- J_train, theta = grandDescent.grandDescent(np.copy(X), y, maxCycles, alpha, lamb)
- J_cross = costFunction.countCostFunc(np.hstack([np.ones((X1.shape[0], 1)), X1]), y1, theta, lamb)
- rmseResult = rmse.countrmse(np.copy(X1),y1,theta) #后面都是保存一些东西
- if save and thetaPath:
- if not add and os.path.exists(thetaPath):
- os.remove(thetaPath)
- file_object = None
- if not add:
- file_object = open(thetaPath, 'w')
- elif os.path.exists(thetaPath):
- file_object = open(thetaPath, 'a')
- else:
- file_object = open(thetaPath, 'w')
- file_object.write("J_traincost=>"+ str(J_train) + ",J_crosscost=>" + str(J_cross[0]) + ",alpha=>" + str(alpha) + ",lamb=>" + str(lamb) + ", Cycles=>"+ str(maxCycles) + ", rmse=>" + str(rmseResult) + "\n" + ",theata==>"+str(theta.transpose()) + "\n")
- file_object.close()
- J = None
- if optGoal == "J_train":
- J = J_train
- elif optGoal == "J_cross":
- J = J_cross
- elif optGoal == "rmse":
- J = rmseResult
- else:
- print "optGoal fault!!!"
- return J, theta
- if __name__ == "__main__":
- print "load data..."
- X_train, y_train, X_cross, y_cross, X_test = handle_data_self()
- print "load data finished"
- print "进入traing..."
- action = "regression_grandDescent"
- J = None
- theta = None
- if action == "regression_grandDescent":
- option = {"maxCycle": 400, "alpha": 0.05, "lamb": 0.001, "saveRecord": 1,
- "thetaWritePath": "./thetaSave.txt", "add":1, "optGoal":"J_train",
- "method": "grandDescent"}
- J, theta = regression_simple(X_train.copy(), y_train.copy(), X_cross.copy(), y_cross.copy(), option)
- plt.plot(X_train, predict.predict(X_train, theta), color="Green")
- plt.show()
- print "完成traing"
- if action == "regression_grandDescent" or action == "regression_grandDescentWithBestAlphaAndLamb" or action == "regression_stocGradDescent":
- y_pre = predict.predict(X_test, theta)
- write2File.savePrediction(y_pre, path="sample_submission.csv")
- print "预测完毕","保存结果至sample_submission.csv"
这里分别是,原始数据集X,原始数据集y,交叉验证集X,交叉验证y,检测集XX_train, y_train, X_cross, y_cross, X_test = handle_data_self()
rmse.py 文件
- #coding=utf-8
- import numpy as np
- import operator
- import copy
- import math
- import costFunction
- def countrmse(X, y, theta):
- m,n = np.shape(X)
- theta = theta
- #扩展1
- X = np.hstack([np.ones((m,1)), X])
- h = np.dot(X , theta)
- #按列求和
- sumDiffY = np.dot((h - y).transpose(), (h - y))
- J = sumDiffY / float(m)
- #print np.shape(J)
- return math.sqrt(J)


- #coding=utf-8
- import numpy as np
- import costFunction
- #theta多了theta0但是X还没有增加1列
- def predict(X, theta):
- X = np.array(X)
- m,n = np.shape(X)
- X = np.hstack([np.ones((m, 1)), X])
- h = np.dot(X, theta)
- return h
- def savePrediction(y_pre, path):
- file_object = open(path, 'w')
- file_object.write("Id,reference\n")
- for i in range(len(y_pre)):
- file_object.write(str(i) + "," + str(y_pre[i][0]) + "\n")
alphas = [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001] maxCycles = 200 lambs = [100,10,1,0.1,0.01, 0.001]
