#!/usr/bin/python # -*- coding: UTF-8 -*- ''' 最简单的mse ''' import sys reload(sys) sys.setdefaultencoding('utf-8') import random import numpy as np from sklearn.linear_model import LinearRegression from sklearn import metrics from draw import draw_util def read_data(path): with open(path) as f : lines=f.readlines() lines=[eval(line.strip()) for line in lines] return lines def cal_step_pow(data, w, b=3): p = [(w*item[0][0] + b - item[1][0])*item[0][0]*2 for item in data] sum_p = sum(p) return sum_p/len(data) def cal_step_pow_b(data, w, b=1): p = [(w*item[0][0] + b - item[1][0])*2 for item in data] sum_p = sum(p) return sum_p/len(data) def cal_mse(data, w, b=1): sum_p = sum([(w * item[0][0] + b - item[1][0]) * (w * item[0][0] + b - item[1][0]) for item in data]) return sum_p / len(data) def train(): train_data = read_data('train_data') w = random.uniform(-50, 50) for i in range(50): step = cal_step_pow(train_data, w)*0.01 mse = cal_mse(train_data, w) print w, step, mse w = w - step return w def train_b(w): train_data = read_data('train_data') b = random.uniform(-50, 50) for i in range(1000): step = cal_step_pow_b(train_data, w, b)*0.01 mse = cal_mse(train_data, w, b) print b, step, mse b = b - step return b if __name__ == '__main__': w = train() print "__________" b = train_b(w) print "__________" print w,b train_data = read_data('train_data') X, y = zip(*train_data) X = np.array(X) y = np.array(y) model = LinearRegression() # 一调用这个函数,就会不停地找合适的w和b 直到误差最小 model.fit(X, y) print model.coef_, model.intercept_