cancer_get_data.py 1007 B

1234567891011121314151617181920212223242526272829303132333435
  1. # -*- encoding:utf-8 -*-
  2. from sklearn import datasets
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.linear_model import LinearRegression
  5. from sklearn.model_selection import cross_val_predict
  6. from numpy import shape
  7. from sklearn import metrics
  8. def array_to_list(data):
  9. data = data.tolist()
  10. return data
  11. loaded_data = datasets.load_breast_cancer()
  12. # loaded_data = datasets.load_boston()
  13. data_X = loaded_data.data
  14. data_y = loaded_data.target
  15. X_train, X_test, y_train, y_test = train_test_split(data_X, data_y, test_size=0.2)
  16. X_train = array_to_list(X_train)
  17. X_test = array_to_list(X_test)
  18. y_train = array_to_list(y_train)
  19. y_test = array_to_list(y_test)
  20. train_data = zip(X_train, y_train)
  21. train_data = [str(list(s)) for s in train_data]
  22. test_data = zip(X_test, y_test)
  23. test_data = [str(list(s)) for s in test_data]
  24. with open("cancer_train_data", "w") as f:
  25. f.writelines("\n".join(train_data))
  26. with open("cancer_test_data", "w") as f:
  27. f.writelines("\n".join(test_data))