In [1]:
from sklearn.datasets import load_breast_cancer
df = load_breast_cancer()
print(df.DESCR)

.. _breast_cancer_dataset:

Breast cancer wisconsin (diagnostic) dataset
--------------------------------------------

**Data Set Characteristics:**

    :Number of Instances: 569

    :Number of Attributes: 30 numeric, predictive attributes and the class

    :Attribute Information:
        - radius (mean of distances from center to points on the perimeter)
        - texture (standard deviation of gray-scale values)
        - perimeter
        - area
        - smoothness (local variation in radius lengths)
        - compactness (perimeter^2 / area - 1.0)
        - concavity (severity of concave portions of the contour)
        - concave points (number of concave portions of the contour)
        - symmetry
        - fractal dimension ("coastline approximation" - 1)

        The mean, standard error, and "worst" or largest (mean of the three
        worst/largest values) of these features were computed for each image,
        resulting in 30 features.  For instance, field 0 is Mean Radi

In [4]:
# generating Train/Test Split
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df.data, df.target,
                                                    test_size=0.4,
                                                    random_state=67)

In [9]:
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()
train = clf.fit(X_train, y_train)

In [10]:
# predict

pred = train.predict(X_test)

In [12]:
# evaluate

from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(pred, y_test)
cm

array([[ 78,  10],
       [  8, 132]])

In [14]:
print(classification_report(pred, y_test))

              precision    recall  f1-score   support

           0       0.91      0.89      0.90        88
           1       0.93      0.94      0.94       140

    accuracy                           0.92       228
   macro avg       0.92      0.91      0.92       228
weighted avg       0.92      0.92      0.92       228



In [15]:
from sklearn import tree
with open('breast_cancer.dot','w') as f:
    f = tree.export_graphviz(clf, out_file=f, feature_names=df.feature_names,
                             class_names=df.target_names, filled=True, rounded=True,
                             special_characters=True)

## KMeans clustering