import pandas as pd
iris = pd.read_csv("iris_data.csv", sep=",")
iris.head()
sepal_length | sepal_width | petal_length | petal_width | species | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
iris.shape
(150, 5)
La dataframe iris compte 5 variables et 150 individus
La variable cible est: "species".
Les variables explicatives sont: "sepal_length, sepal_width, petal_length et petal_width
X = iris.loc[:, "sepal_length":"petal_width"]
y = iris["species"]
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35)
from sklearn.neighbors import KNeighborsClassifier
# Instanciation du modèle
modele1 = KNeighborsClassifier(n_neighbors=3)
modele1.fit(X_train, y_train)
KNeighborsClassifier(n_neighbors=3)
y_pred = modele1.predict(X_test)
y_pred
array(['versicolor', 'virginica', 'versicolor', 'versicolor', 'setosa', 'versicolor', 'setosa', 'versicolor', 'setosa', 'virginica', 'versicolor', 'setosa', 'setosa', 'virginica', 'virginica', 'setosa', 'virginica', 'versicolor', 'versicolor', 'versicolor', 'setosa', 'virginica', 'versicolor', 'setosa', 'setosa', 'versicolor', 'versicolor', 'versicolor', 'versicolor', 'setosa', 'versicolor', 'virginica', 'virginica', 'virginica', 'virginica', 'virginica', 'virginica', 'setosa', 'setosa', 'virginica', 'setosa', 'virginica', 'setosa', 'setosa', 'setosa', 'setosa', 'setosa', 'virginica', 'setosa', 'versicolor', 'versicolor', 'setosa', 'setosa'], dtype=object)
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test,y_pred)
array([[21, 0, 0], [ 0, 16, 1], [ 0, 1, 14]], dtype=int64)
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)
0.9622641509433962
(21+16+14)/(21+16+14+1+1)
0.9622641509433962
cancer = pd.read_csv("data_breast_cancer.csv")
cancer.head()
id | diagnosis | radius_mean | texture_mean | perimeter_mean | area_mean | smoothness_mean | compactness_mean | concavity_mean | concave points_mean | ... | texture_worst | perimeter_worst | area_worst | smoothness_worst | compactness_worst | concavity_worst | concave points_worst | symmetry_worst | fractal_dimension_worst | Unnamed: 32 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 842302 | M | 17.99 | 10.38 | 122.80 | 1001.0 | 0.11840 | 0.27760 | 0.3001 | 0.14710 | ... | 17.33 | 184.60 | 2019.0 | 0.1622 | 0.6656 | 0.7119 | 0.2654 | 0.4601 | 0.11890 | NaN |
1 | 842517 | M | 20.57 | 17.77 | 132.90 | 1326.0 | 0.08474 | 0.07864 | 0.0869 | 0.07017 | ... | 23.41 | 158.80 | 1956.0 | 0.1238 | 0.1866 | 0.2416 | 0.1860 | 0.2750 | 0.08902 | NaN |
2 | 84300903 | M | 19.69 | 21.25 | 130.00 | 1203.0 | 0.10960 | 0.15990 | 0.1974 | 0.12790 | ... | 25.53 | 152.50 | 1709.0 | 0.1444 | 0.4245 | 0.4504 | 0.2430 | 0.3613 | 0.08758 | NaN |
3 | 84348301 | M | 11.42 | 20.38 | 77.58 | 386.1 | 0.14250 | 0.28390 | 0.2414 | 0.10520 | ... | 26.50 | 98.87 | 567.7 | 0.2098 | 0.8663 | 0.6869 | 0.2575 | 0.6638 | 0.17300 | NaN |
4 | 84358402 | M | 20.29 | 14.34 | 135.10 | 1297.0 | 0.10030 | 0.13280 | 0.1980 | 0.10430 | ... | 16.67 | 152.20 | 1575.0 | 0.1374 | 0.2050 | 0.4000 | 0.1625 | 0.2364 | 0.07678 | NaN |
5 rows × 33 columns
X = cancer.loc[:,'radius_mean':'fractal_dimension_worst']
y=cancer['diagnosis']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
from sklearn.neighbors import KNeighborsClassifier
modele_knn = KNeighborsClassifier(n_neighbors=3)
from sklearn.linear_model import LogisticRegression
modele_reg_log = LogisticRegression(max_iter=5000)
modele_knn.fit(X_train, y_train)
modele_reg_log.fit(X_train, y_train)
LogisticRegression(max_iter=5000)
pred_knn = modele_knn.predict(X_test)
pred_reg_log = modele_reg_log.predict(X_test)
pred_knn
array(['M', 'B', 'B', 'M', 'B', 'B', 'M', 'M', 'B', 'M', 'B', 'B', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'M', 'M', 'M', 'B', 'M', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'M', 'M', 'M', 'M', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'M', 'B', 'B', 'M', 'B', 'M', 'M'], dtype=object)
pred_reg_log
array(['M', 'B', 'B', 'M', 'B', 'B', 'M', 'M', 'B', 'M', 'B', 'B', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'M', 'B', 'M', 'B', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'M', 'M', 'B', 'M', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'M', 'B', 'M', 'M', 'B', 'B', 'B', 'M', 'B', 'M', 'M', 'M', 'M', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'B', 'M', 'M', 'B', 'B', 'M', 'B', 'M', 'M'], dtype=object)
confusion_matrix(y_test, pred_knn)
array([[82, 1], [ 8, 52]], dtype=int64)
confusion_matrix(y_test, pred_reg_log)
array([[82, 1], [ 6, 54]], dtype=int64)
accuracy_score(y_test, pred_knn)
0.9370629370629371
accuracy_score(y_test, pred_reg_log)
0.951048951048951
pred_proba_knn = modele_knn.predict_proba(X_test)
pred_proba_knn
array([[0. , 1. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [0.66666667, 0.33333333], [1. , 0. ], [0. , 1. ], [0. , 1. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [0.33333333, 0.66666667], [1. , 0. ], [0. , 1. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [0.66666667, 0.33333333], [1. , 0. ], [0. , 1. ], [0.66666667, 0.33333333], [0.33333333, 0.66666667], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [0.33333333, 0.66666667], [1. , 0. ], [0.33333333, 0.66666667], [0. , 1. ], [0. , 1. ], [1. , 0. ], [0.33333333, 0.66666667], [0. , 1. ], [1. , 0. ], [0. , 1. ], [0.66666667, 0.33333333], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [0. , 1. ], [0. , 1. ], [1. , 0. ], [0.33333333, 0.66666667], [0. , 1. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [0. , 1. ], [0.66666667, 0.33333333], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [0. , 1. ], [0. , 1. ], [0. , 1. ], [0. , 1. ], [0. , 1. ], [1. , 0. ], [0.33333333, 0.66666667], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [0.66666667, 0.33333333], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [0.33333333, 0.66666667], [1. , 0. ], [0. , 1. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0.66666667, 0.33333333], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [0.66666667, 0.33333333], [1. , 0. ], [0.66666667, 0.33333333], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [0. , 1. ], [1. , 0. ], [1. , 0. ], [0. , 1. ], [1. , 0. ], [0. , 1. ], [0. , 1. ]])
pred_proba_reg_log = modele_reg_log.predict_proba(X_test)
pred_proba_reg_log
array([[1.58570398e-06, 9.99998414e-01], [9.97729191e-01, 2.27080940e-03], [9.99353267e-01, 6.46732792e-04], [3.50792062e-10, 1.00000000e+00], [5.13338197e-01, 4.86661803e-01], [9.99995551e-01, 4.44947030e-06], [1.24334106e-03, 9.98756659e-01], [3.09770654e-09, 9.99999997e-01], [9.99630564e-01, 3.69436078e-04], [7.36306901e-03, 9.92636931e-01], [9.99979306e-01, 2.06937603e-05], [9.99049022e-01, 9.50977587e-04], [9.08804143e-11, 1.00000000e+00], [1.29509037e-02, 9.87049096e-01], [9.35444560e-01, 6.45554404e-02], [4.08117984e-13, 1.00000000e+00], [1.43085504e-03, 9.98569145e-01], [9.93007993e-01, 6.99200694e-03], [9.99558359e-01, 4.41641325e-04], [9.99579889e-01, 4.20111213e-04], [9.97983556e-01, 2.01644428e-03], [1.11464910e-03, 9.98885351e-01], [9.99999282e-01, 7.17849294e-07], [9.99511009e-01, 4.88990569e-04], [9.88617371e-01, 1.13826293e-02], [7.47205755e-01, 2.52794245e-01], [1.58210612e-06, 9.99998418e-01], [7.22159358e-04, 9.99277841e-01], [9.95976818e-01, 4.02318210e-03], [9.99490400e-01, 5.09599965e-04], [1.21661914e-06, 9.99998783e-01], [9.96881683e-01, 3.11831660e-03], [7.06964516e-03, 9.92930355e-01], [9.93429161e-01, 6.57083879e-03], [3.64909006e-01, 6.35090994e-01], [9.99939950e-01, 6.00503896e-05], [6.70412248e-03, 9.93295878e-01], [1.95344046e-01, 8.04655954e-01], [9.27697000e-01, 7.23029996e-02], [9.89157785e-01, 1.08422146e-02], [9.99570437e-01, 4.29563258e-04], [1.18580853e-02, 9.88141915e-01], [9.98235624e-01, 1.76437639e-03], [4.83236274e-01, 5.16763726e-01], [9.98847334e-01, 1.15266553e-03], [9.98211977e-01, 1.78802267e-03], [9.59709791e-01, 4.02902094e-02], [1.25040044e-02, 9.87495996e-01], [5.22130239e-01, 4.77869761e-01], [9.97510241e-01, 2.48975873e-03], [7.40221868e-01, 2.59778132e-01], [3.11532846e-02, 9.68846715e-01], [3.35890846e-08, 9.99999966e-01], [9.94061745e-01, 5.93825500e-03], [5.56928355e-03, 9.94430716e-01], [2.46274036e-06, 9.99997537e-01], [9.90885375e-01, 9.11462464e-03], [4.87069974e-02, 9.51293003e-01], [7.26195193e-01, 2.73804807e-01], [9.50089969e-01, 4.99100315e-02], [9.94412968e-01, 5.58703245e-03], [9.98425732e-01, 1.57426843e-03], [7.76038648e-03, 9.92239614e-01], [7.57397125e-07, 9.99999243e-01], [5.22451369e-06, 9.99994775e-01], [9.89003499e-01, 1.09965011e-02], [4.80143543e-02, 9.51985646e-01], [4.71385394e-02, 9.52861461e-01], [9.99774714e-01, 2.25285540e-04], [9.99260983e-01, 7.39016969e-04], [9.99469501e-01, 5.30498785e-04], [9.97025216e-01, 2.97478381e-03], [9.93825212e-01, 6.17478761e-03], [9.85669937e-01, 1.43300635e-02], [2.99837734e-06, 9.99997002e-01], [9.99802277e-01, 1.97723394e-04], [2.62257489e-08, 9.99999974e-01], [9.99057213e-01, 9.42786838e-04], [9.63352603e-01, 3.66473966e-02], [2.48757078e-04, 9.99751243e-01], [9.97513777e-01, 2.48622322e-03], [4.43024068e-04, 9.99556976e-01], [1.18026326e-01, 8.81973674e-01], [9.99766346e-01, 2.33654185e-04], [9.61558338e-01, 3.84416623e-02], [9.83615094e-01, 1.63849059e-02], [5.10859791e-06, 9.99994891e-01], [9.58016902e-01, 4.19830978e-02], [4.79157752e-02, 9.52084225e-01], [3.43725048e-13, 1.00000000e+00], [1.94809110e-04, 9.99805191e-01], [1.13770183e-08, 9.99999989e-01], [4.04487590e-03, 9.95955124e-01], [9.99596411e-01, 4.03588743e-04], [1.76655584e-02, 9.82334442e-01], [9.99006188e-01, 9.93811623e-04], [7.89226049e-01, 2.10773951e-01], [9.10505256e-01, 8.94947438e-02], [9.96749470e-01, 3.25052998e-03], [9.99982793e-01, 1.72074796e-05], [1.77635684e-15, 1.00000000e+00], [4.58170705e-02, 9.54182930e-01], [9.68718952e-01, 3.12810484e-02], [9.84469687e-01, 1.55303134e-02], [9.96506048e-01, 3.49395157e-03], [9.97012723e-01, 2.98727685e-03], [1.35246856e-02, 9.86475314e-01], [8.86478631e-01, 1.13521369e-01], [9.99988451e-01, 1.15486279e-05], [8.67322439e-01, 1.32677561e-01], [9.90403927e-01, 9.59607292e-03], [1.23647947e-03, 9.98763521e-01], [6.10622664e-14, 1.00000000e+00], [9.96959631e-01, 3.04036869e-03], [9.99545441e-01, 4.54559056e-04], [9.99995450e-01, 4.54957037e-06], [9.23948232e-01, 7.60517683e-02], [9.99709917e-01, 2.90082676e-04], [9.99990823e-01, 9.17704762e-06], [9.99915937e-01, 8.40631544e-05], [9.99986734e-01, 1.32662289e-05], [9.98336038e-01, 1.66396172e-03], [9.99961765e-01, 3.82347230e-05], [9.13803387e-01, 8.61966127e-02], [5.71515191e-09, 9.99999994e-01], [9.99692738e-01, 3.07261980e-04], [9.99032059e-01, 9.67941259e-04], [2.46956525e-01, 7.53043475e-01], [9.97768102e-01, 2.23189754e-03], [9.82422696e-01, 1.75773039e-02], [8.41075141e-01, 1.58924859e-01], [9.82059450e-01, 1.79405499e-02], [1.53836229e-07, 9.99999846e-01], [9.97198586e-01, 2.80141386e-03], [9.99905151e-01, 9.48489308e-05], [7.78652349e-07, 9.99999221e-01], [1.06505702e-06, 9.99998935e-01], [9.98816528e-01, 1.18347197e-03], [9.86061473e-01, 1.39385269e-02], [1.63169142e-03, 9.98368309e-01], [9.87330105e-01, 1.26698949e-02], [1.04133699e-02, 9.89586630e-01], [5.36966027e-10, 9.99999999e-01]])