123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384 |
- # -*- encoding:utf-8 -*-
- from sklearn import datasets
- from sklearn.model_selection import train_test_split
- from sklearn.linear_model import LogisticRegression
- from sklearn.model_selection import cross_val_predict
- from numpy import shape
- from sklearn import metrics
- from sklearn.metrics import log_loss
- import numpy as np
- import matplotlib.pyplot as plt
- def read_data(path):
- with open(path) as f:
- lines = f.readlines()
- lines = [eval(line.strip()) for line in lines]
- X, y = zip(*lines)
- X = np.array(X)
- y = np.array(y)
- return X, y
- def curve(x_train, w, w0):
- results = x_train.tolist()
- for i in range(0, 100):
- x1 = 1.0 * i / 10
- x2 = -1 * (w[0] * x1 + w0) / w[1]
- results.append([x1, x2])
- results = ["{},{}".format(x1, x2) for [x1, x2] in results]
- return results
- def drawScatterAndLine(p, q):
- x1 = []
- x2 = []
- y1 = []
- y2 = []
- for idx,i in enumerate(q):
- if i == 0:
- x1.append(p[idx][0])
- y1.append(p[idx][1])
- else:
- x2.append(p[idx][0])
- y2.append(p[idx][1])
- plt.scatter(x1, y1)
- plt.scatter(x2, y2)
- plt.xlabel('p')
- plt.ylabel('q')
- plt.title('line regesion')
- plt.show()
- def main():
- X_train, y_train = read_data("train_data")
- drawScatterAndLine(X_train, y_train)
- X_test, y_test = read_data("test_data")
- model = LogisticRegression()
- model.fit(X_train, y_train)
- print "w", model.coef_
- print "w0", model.intercept_
- y_pred = model.predict(X_test)
- print y_pred
- # y_pred = model.predict_proba(X_test)
- # print y_pred
- # loss=log_loss(y_test,y_pred)
- # print "KL_loss:",loss
- # loss=log_loss(y_pred,y_test)
- # print "KL_loss:",loss
- '''
- curve_results=curve(X_train,model.coef_.tolist()[0],model.intercept_.tolist()[0])
- with open("train_with_splitline","w") as f :
- f.writelines("\n".join(curve_results))
- '''
- if __name__ == '__main__':
- main()
|