Intro to SKLearn¶

In [5]:
# import packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
import seaborn as sns; sns.set()
In [9]:
iris = datasets.load_iris()
#digits = datasets.load_digits()
In [4]:
# explore iris data set
# we are going to use the features to predict the target
iris.feature_names
Out[4]:
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']
In [12]:
# what are the targets?
# these are the species we are trying to predict
iris.target_names
Out[12]:
array(['setosa', 'versicolor', 'virginica'], dtype='<U10')
In [15]:
#iris.data
In [13]:
from sklearn.model_selection import train_test_split
In [67]:
# creating a training and test split
# train_test_split is a function that takes in data, targets, test size, and random state
# outputs train features, test feat., train targets, test targets 
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)
In [68]:
from sklearn.linear_model import LogisticRegression
In [71]:
# fit a logistic regression model on training data
clf = LogisticRegression(multi_class='multinomial', solver='saga', tol=0.001, random_state=1)
clf.fit(X_train, y_train)
Out[71]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=1, solver='saga', tol=0.001, verbose=0,
                   warm_start=False)
In [72]:
score = clf.score(X_test, y_test)
score
Out[72]:
1.0
In [30]:
coef = clf.coef_.copy()
plt.figure(figsize=(10, 5))
scale = np.abs(coef).max()
for i in range(3):
    l1_plot = plt.subplot(1, 3, i + 1)
    l1_plot.imshow(coef[i].reshape(2, 2), interpolation='nearest',
                   cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
    l1_plot.set_xticks(())
    l1_plot.set_yticks(())
    l1_plot.set_xlabel('Class %i' % i)
plt.suptitle('Classification vector for...')
plt.show()
In [48]:
bop = pd.Series([1,0,0], index=['c','a','d'])
In [50]:
bop.index = ['a','b','c']
In [ ]: