1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- #!/usr/bin/python
- # -*- coding: UTF-8 -*-
- '''
- 最简单的mse
- '''
- import sys
- reload(sys)
- sys.setdefaultencoding('utf-8')
- import random
- import numpy as np
- from sklearn.linear_model import LinearRegression
- from sklearn import metrics
- from draw import draw_util
- def read_data(path):
- with open(path) as f :
- lines=f.readlines()
- lines=[eval(line.strip()) for line in lines]
- return lines
- def cal_step_pow(data, w, b=3):
- p = [(w*item[0][0] + b - item[1][0])*item[0][0]*2 for item in data]
- sum_p = sum(p)
- return sum_p/len(data)
- def cal_step_pow_b(data, w, b=1):
- p = [(w*item[0][0] + b - item[1][0])*2 for item in data]
- sum_p = sum(p)
- return sum_p/len(data)
- def cal_mse(data, w, b=1):
- sum_p = sum([(w * item[0][0] + b - item[1][0]) * (w * item[0][0] + b - item[1][0]) for item in data])
- return sum_p / len(data)
- def train():
- train_data = read_data('train_data')
- w = random.uniform(-50, 50)
- for i in range(50):
- step = cal_step_pow(train_data, w)*0.01
- mse = cal_mse(train_data, w)
- print w, step, mse
- w = w - step
- return w
- def train_b(w):
- train_data = read_data('train_data')
- b = random.uniform(-50, 50)
- for i in range(1000):
- step = cal_step_pow_b(train_data, w, b)*0.01
- mse = cal_mse(train_data, w, b)
- print b, step, mse
- b = b - step
- return b
- if __name__ == '__main__':
- w = train()
- print "__________"
- b = train_b(w)
- print "__________"
- print w,b
- train_data = read_data('train_data')
- X, y = zip(*train_data)
- X = np.array(X)
- y = np.array(y)
- model = LinearRegression()
- # 一调用这个函数,就会不停地找合适的w和b 直到误差最小
- model.fit(X, y)
- print model.coef_, model.intercept_
|