赞
踩
题目:https://www.hackerrank.com/challenges/predicting-office-space-price
分析:还是上次的房价预测题目,指明要用多项式回归拟合。在多元多项式拟合时候,目标函数表示如下
对其目标函数求偏导得到
很容易写出代码。
代码:
- #coding:utf-8
-
- import math
-
- class Data:
- def __init__(self):
- self.x = []
- self.y = 0.0
-
- def makeMatrix(row, col, fill = 0.0):
- mat = []
- for i in range(row):
- mat.append([fill] * col)
- return mat
-
- def WX(d, w, b):
- res = 0.0
- for k in range(len(d.x)):
- for j in range(b + 1):
- res += w[k][j] * math.pow(d.x[k], j)
- return res
-
- def Gradient(d, w, f, b, alpha):
- for k in range(f):
- for j in range(b + 1):
- t1, t2 = 0.0, 0.0
- for i in range(len(d)):
- t1 += (WX(d[i], w, b) - d[i].y) * math.pow(d[i].x[k], j)
- w[k][j] -= alpha * t1
-
- def getValues(d, w, b):
- res = 0.0
- for i in range(len(d)):
- tmp = WX(d[i], w, b)
- res += 0.5 * (d[i].y - tmp) * (d[i].y - tmp)
- return res
-
- def Iterator(d, w, f, b):
- alpha = 0.003
- delta = 0.5
- oldVal = getValues(d, w, b)
- Gradient(d, w, f, b, alpha)
- newVal = getValues(d, w, b)
- while abs(oldVal - newVal) > delta:
- oldVal = newVal
- Gradient(d, w, f, b, alpha)
- newVal = getValues(d, w, b)
-
- def main():
- while True:
- try:
- F, N = map(int, raw_input().split())
- d = []
- b = 5
- w = makeMatrix(F, b + 1)
- for i in range(0, N):
- t = Data()
- t.x = map(float, raw_input().split())
- t.y = t.x.pop()
- d.append(t)
- Iterator(d, w, F, b)
- N = int(raw_input())
- for i in range(0, N):
- t = Data()
- t.x = map(float, raw_input().split())
- print '%.2f'% WX(t, w, b)
- except EOFError:
- break
-
- if __name__ == '__main__':
- main()
不过,上述代码得到的结果偏差比较大,需要重新考虑。除了上述方式外,还有一种特征组合方法效果不错。
代码:
- #include <iostream>
- #include <string.h>
- #include <fstream>
- #include <stdio.h>
- #include <math.h>
- #include <vector>
-
- #define Vector vector
- using namespace std;
-
- struct Data
- {
- Vector<double> x;
- double y;
- };
-
- double WX(const Data& d, const Vector<double>& w)
- {
- double ans = 0;
- for(int i = 0; i < w.size(); i++)
- ans += w[i] * d.x[i];
- return ans;
- }
-
- void Gradient(const Vector<Data>& d, Vector<double> &w, double alpha)
- {
- for(int i = 0; i < w.size(); i++)
- {
- double tmp = 0;
- for(int j = 0; j < d.size(); j++)
- tmp += alpha * d[j].x[i] * (WX(d[j], w) - d[j].y);
- w[i] -= tmp;
- }
- }
-
- double getValues(const Vector<Data>& d, Vector<double> w)
- {
- double res = 0;
- for(int i = 0; i < d.size(); i++)
- {
- double tmp = WX(d[i], w);
- res += fabs(d[i].y - tmp);
- }
- return res;
- }
-
- void Iterator(const Vector<Data>& d, Vector<double> &w)
- {
- double alpha = 0.3 / d.size();
- double delta = 0.5;
- double oldVal = getValues(d, w);
- Gradient(d, w, alpha);
- double newVal = getValues(d, w);
- while(fabs(oldVal - newVal) > delta)
- {
- oldVal = newVal;
- Gradient(d, w, alpha);
- newVal = getValues(d, w);
- }
- }
-
- Vector<double> getFeatures(Vector<double> x)
- {
- Vector<double> res;
- int n = x.size();
- for(int i = 0; i < n; i++)
- for(int j = i; j < n; j++)
- for(int k = j; k < n; k++)
- res.push_back(x[i] * x[j] * x[k]);
- return res;
- }
-
- int main()
- {
- int F, N;
- Vector<double> w;
- Vector<Data> d;
- while(scanf("%d %d", &F, &N) != EOF)
- {
- d.clear();
- w.clear();
- int features = 0;
- for(int i = 0; i < N; i++)
- {
- Data t;
- double _x, _y;
- t.x.push_back(1);
- for(int j = 1; j <= F; j++)
- {
- scanf("%lf", &_x);
- t.x.push_back(_x);
- }
- t.x = getFeatures(t.x);
- features = t.x.size();
- scanf("%lf", &_y);
- t.y = _y;
- d.push_back(t);
- }
- for(int i = 0; i < features; i++)
- w.push_back(0);
- Iterator(d, w);
- d.clear();
- scanf("%d", &N);
- for(int i = 0; i < N; i++)
- {
- Data t;
- double _x;
- t.x.push_back(1);
- for(int j = 1; j <= F; j++)
- {
- scanf("%lf", &_x);
- t.x.push_back(_x);
- }
- t.x = getFeatures(t.x);
- printf("%.2lf\n", WX(t, w));
- }
- }
- return 0;
- }
另外利用Python的机器学习开源库sklearn很方便处理。具体可以参考如下链接。
题解:http://blog.guozengxin.cn/2015/01/08/hackerrank-predicting-office-space-price/
sklearn官网:http://scikit-learn.org/stable/
sklearn源代码:https://github.com/scikit-learn/scikit-learn/
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。