Wang Haihua
🍈 🍉🍊 🍋 🍌
前馈神经网络(如MLPs和CNNs)功能强大,但它们不能处理“顺序”数据换句话说,他们不具备之前输入的“记忆”例如,考虑翻译语料库的情况。 你需要考虑“上下文”来猜测下一个出现的单词。
循环神经网络RNN适合处理顺序格式数据,因为它们有循环 结构——换句话说,它们保留序列中较早输入的内存 。但是为了减少参数的数量,不同时间步长的每一层都使用相同的参数 。
LSTM (long -短期记忆)是一种改进的结构,用于解决长期依赖问题LSTM网络是一个循环神经网络,它用LSTM细胞块来代替我们的标准神经网络层。这些单元格有不同的组成部分,称为输入门、遗忘门和输出门——这些将在稍后详细解释。以下是LSTM单元格的图形表示:
$$ \begin{gathered} f_{t}=\sigma\left(W_{f h} \cdot h^{t-1}+W_{f x} \cdot x^{t}+b_{f}\right) \\ i_{t}=\sigma\left(W_{i h} \cdot h^{t-1}+W_{i x} \cdot x^{t}+b_{i}\right) \\ \tilde{C}_{t}=\tanh \left(W_{C h} \cdot h^{t-1}+W_{C x} \cdot x^{t}+b_{C}\right) \\ C_{t}=f_{t} * C_{t-1}+i_{t} * \tilde{C}_{t} \\ o_{t}=\sigma\left(W_{o h} \cdot h^{t-1}+W_{o x} \cdot x^{t}+b_{o}\right) \\ h_{t}=o_{t} * \tanh \left(C_{t}\right) \end{gathered} $$import numpy as np
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
# parameters for data load
num_words = 30000
maxlen = 50
test_split = 0.3
(X_train, y_train), (X_test, y_test) = reuters.load_data(num_words = num_words, maxlen = maxlen, test_split = test_split)
D:\software_install\anaconda\lib\site-packages\tensorflow\python\keras\datasets\reuters.py:148: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx]) D:\software_install\anaconda\lib\site-packages\tensorflow\python\keras\datasets\reuters.py:149: VisibleDeprecationWarning: Creating an ndarray from ragged nested sequences (which is a list-or-tuple of lists-or-tuples-or ndarrays with different lengths or shapes) is deprecated. If you meant to do this, you must specify 'dtype=object' when creating the ndarray. x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])
# pad the sequences with zeros
# padding parameter is set to 'post' => 0's are appended to end of sequences
X_train = pad_sequences(X_train, padding = 'post')
X_test = pad_sequences(X_test, padding = 'post')
X_train = np.array(X_train).reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = np.array(X_test).reshape((X_test.shape[0], X_test.shape[1], 1))
y_data = np.concatenate((y_train, y_test))
y_data = to_categorical(y_data)
y_train = y_data[:1395]
y_test = y_data[1395:]
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
(1395, 49, 1) (599, 49, 1) (1395, 46) (599, 46)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN, Activation
from tensorflow.keras import optimizers
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
def vanilla_rnn():
model = Sequential()
model.add(SimpleRNN(50, input_shape = (49,1), return_sequences = False))
model.add(Dense(46))
model.add(Activation('softmax'))
adam = optimizers.Adam(lr = 0.001)
model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
return model
model = KerasClassifier(build_fn = vanilla_rnn, epochs = 200, batch_size = 50, verbose = 1)
model.fit(X_train, y_train)
from tensorflow.keras.layers import LSTM
def lstm():
model = Sequential()
model.add(LSTM(50, input_shape = (49,1), return_sequences = False))
model.add(Dense(46))
model.add(Activation('softmax'))
adam = optimizers.Adam(lr = 0.001)
model.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])
return model
model = KerasClassifier(build_fn = lstm, epochs = 200, batch_size = 50, verbose = 1)
model.fit(X_train, y_train)
np.__version__
'1.21.6'