# import packages
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
import seaborn as sns; sns.set()
iris = datasets.load_iris()
#digits = datasets.load_digits()
# explore iris data set
# we are going to use the features to predict the target
iris.feature_names
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
# what are the targets?
# these are the species we are trying to predict
iris.target_names
array(['setosa', 'versicolor', 'virginica'], dtype='<U10')
#iris.data
from sklearn.model_selection import train_test_split
# creating a training and test split
# train_test_split is a function that takes in data, targets, test size, and random state
# outputs train features, test feat., train targets, test targets
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.33, random_state=42)
from sklearn.linear_model import LogisticRegression
# fit a logistic regression model on training data
clf = LogisticRegression(multi_class='multinomial', solver='saga', tol=0.001, random_state=1)
clf.fit(X_train, y_train)
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True, intercept_scaling=1, l1_ratio=None, max_iter=100, multi_class='multinomial', n_jobs=None, penalty='l2', random_state=1, solver='saga', tol=0.001, verbose=0, warm_start=False)
score = clf.score(X_test, y_test)
score
1.0
coef = clf.coef_.copy()
plt.figure(figsize=(10, 5))
scale = np.abs(coef).max()
for i in range(3):
l1_plot = plt.subplot(1, 3, i + 1)
l1_plot.imshow(coef[i].reshape(2, 2), interpolation='nearest',
cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
l1_plot.set_xticks(())
l1_plot.set_yticks(())
l1_plot.set_xlabel('Class %i' % i)
plt.suptitle('Classification vector for...')
plt.show()
bop = pd.Series([1,0,0], index=['c','a','d'])
bop.index = ['a','b','c']