In [1]:
from sklearn.datasets import load_breast_cancer

In [2]:
bresat_cancer = load_breast_cancer()
print(bresat_cancer.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [5]:
print(bresat_cancer.data)
print(bresat_cancer.data.shape)
print(bresat_cancer.target)
print(bresat_cancer.target.shape)
print(bresat_cancer.target_names)
print(bresat_cancer.feature_names)

[[1.799e+01 1.038e+01 1.228e+02 ... 2.654e-01 4.601e-01 1.189e-01]
 [2.057e+01 1.777e+01 1.329e+02 ... 1.860e-01 2.750e-01 8.902e-02]
 [1.969e+01 2.125e+01 1.300e+02 ... 2.430e-01 3.613e-01 8.758e-02]
 ...
 [1.660e+01 2.808e+01 1.083e+02 ... 1.418e-01 2.218e-01 7.820e-02]
 [2.060e+01 2.933e+01 1.401e+02 ... 2.650e-01 4.087e-01 1.240e-01]
 [7.760e+00 2.454e+01 4.792e+01 ... 0.000e+00 2.871e-01 7.039e-02]]
(569, 30)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 0 0 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0
 1 0 1 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 1 1 0 0 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1
 1 1 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0 0 1 0 0 1 1 0 1 1 0 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 1 0 0 1 1 1 1 0 1 1 0 0 0 1 0
 1 0 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0 0 1 1 0 0 1 1
 1 0 1 1 1 1 1 0 0 1 1 0 1 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 0 1 1 0 1 0 0 1 1 1 

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
xTrain, xTest, yTrain, yTest = train_test_split(bresat_cancer.data,
                                                bresat_cancer.target,
                                                train_size=0.6, random_state=0)

In [8]:
xTrain

array([[1.120e+01, 2.937e+01, 7.067e+01, ..., 0.000e+00, 1.566e-01,
        5.905e-02],
       [1.981e+01, 2.215e+01, 1.300e+02, ..., 2.388e-01, 2.768e-01,
        7.615e-02],
       [1.630e+01, 1.570e+01, 1.047e+02, ..., 1.357e-01, 2.300e-01,
        7.230e-02],
       ...,
       [9.436e+00, 1.832e+01, 5.982e+01, ..., 5.052e-02, 2.454e-01,
        8.136e-02],
       [9.720e+00, 1.822e+01, 6.073e+01, ..., 0.000e+00, 1.909e-01,
        6.559e-02],
       [1.151e+01, 2.393e+01, 7.452e+01, ..., 9.653e-02, 2.112e-01,
        8.732e-02]])

In [9]:
from sklearn.neighbors import KNeighborsClassifier

In [17]:
clf = KNeighborsClassifier(n_neighbors=5)
fit = clf.fit(xTrain, yTrain)
yPred = fit.predict(xTest)

In [18]:
from sklearn.metrics import confusion_matrix

In [19]:
cm = confusion_matrix(yTest, yPred)
print(cm)

[[ 78   5]
 [  6 139]]


In [20]:
from sklearn.metrics import classification_report

In [22]:
print(classification_report(yTest, yPred, target_names=bresat_cancer.target_names))

              precision    recall  f1-score   support

   malignant       0.93      0.94      0.93        83
      benign       0.97      0.96      0.96       145

    accuracy                           0.95       228
   macro avg       0.95      0.95      0.95       228
weighted avg       0.95      0.95      0.95       228



In [26]:
print(f'ACC: {fit.score(xTest, yTest):.2f}')

ACC: 0.95


In [34]:
clf = KNeighborsClassifier(n_neighbors=5, weights='distance')
fit = clf.fit(xTrain, yTrain)
yPred = fit.predict(xTest)
cm = confusion_matrix(yTest, yPred)
print(cm)
print(classification_report(yTest, yPred, target_names=bresat_cancer.target_names))

[[ 78   5]
 [  5 140]]
              precision    recall  f1-score   support

   malignant       0.94      0.94      0.94        83
      benign       0.97      0.97      0.97       145

    accuracy                           0.96       228
   macro avg       0.95      0.95      0.95       228
weighted avg       0.96      0.96      0.96       228



In [35]:
clf = KNeighborsClassifier(n_neighbors=5, weights='distance', p=1)
fit = clf.fit(xTrain, yTrain)
yPred = fit.predict(xTest)
cm = confusion_matrix(yTest, yPred)
print(cm)
print(classification_report(yTest, yPred, target_names=bresat_cancer.target_names))

[[ 78   5]
 [  2 143]]
              precision    recall  f1-score   support

   malignant       0.97      0.94      0.96        83
      benign       0.97      0.99      0.98       145

    accuracy                           0.97       228
   macro avg       0.97      0.96      0.97       228
weighted avg       0.97      0.97      0.97       228



In [38]:
clf = KNeighborsClassifier(n_neighbors=10, weights='distance', p=1)
fit = clf.fit(xTrain, yTrain)
yPred = fit.predict(xTest)
cm = confusion_matrix(yTest, yPred)
print(cm)
print(classification_report(yTest, yPred, target_names=bresat_cancer.target_names))

[[ 78   5]
 [  1 144]]
              precision    recall  f1-score   support

   malignant       0.99      0.94      0.96        83
      benign       0.97      0.99      0.98       145

    accuracy                           0.97       228
   macro avg       0.98      0.97      0.97       228
weighted avg       0.97      0.97      0.97       228

