# import packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
import seaborn as sns; sns.set()


iris = datasets.load_iris()
#digits = datasets.load_digits()


# explore iris data set
# we are going to use the features to predict the target
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']


# what are the targets?
# these are the species we are trying to predict
iris.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')


#iris.data


from sklearn.model_selection import train_test_split


# creating a training and test split
# train_test_split is a function that takes in data, targets, test size, and random state
# outputs train features, test feat., train targets, test targets 
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)


from sklearn.linear_model import LogisticRegression


# fit a logistic regression model on training data
clf = LogisticRegression(multi_class='multinomial', solver='saga', tol=0.001, random_state=1)
clf.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=1, solver='saga', tol=0.001, verbose=0,
                   warm_start=False)


score = clf.score(X_test, y_test)
score

1.0


coef = clf.coef_.copy()
plt.figure(figsize=(10, 5))
scale = np.abs(coef).max()
for i in range(3):
    l1_plot = plt.subplot(1, 3, i + 1)
    l1_plot.imshow(coef[i].reshape(2, 2), interpolation='nearest',
                   cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
    l1_plot.set_xticks(())
    l1_plot.set_yticks(())
    l1_plot.set_xlabel('Class %i' % i)
plt.suptitle('Classification vector for...')
plt.show()


bop = pd.Series([1,0,0], index=['c','a','d'])


bop.index = ['a','b','c']

Intro to SKLearn¶