Natural Language Generation in Python
Biswanath Halder
Data Scientist
max_len_eng_sent = max([len(sentence) for sentence in english_sentences])
max_len_fra_sent = max([len(sentence) for sentence in french_sentences])
eng_input_data = np.zeros((len(english_sentences), max_len_eng_sent,
len(english_vocab)), dtype='float32')
fra_input_data = np.zeros((len(french_sentences), max_len_fra_sent,
len(french_vocab)), dtype='float32')
target_data = np.zeros((len(french_sentences), max_len_fra_sent,
len(french_vocab)), dtype='float32')
for i in range(no_of_sentences):
# Iterate over each character of English sentences
for k, ch in enumerate(english_sentences[i]):
eng_input_data[i, k, eng_char_to_idx[ch]] = 1.
# Iterate over each character of French sentences
for k, ch in enumerate(french_sentences[i]):
fra_input_data[i, k, fra_char_to_idx[ch]] = 1.
# Target data will be one timestep ahead
if k > 0:
target_data[i, k-1, fra_char_to_idx[ch]] = 1.
# This returns a input vector of size 784
inputs = Input(shape=(784,))
# A dense layer of 64 units is called on a vector returning a tensor
predictions = Dense(64, activation='relu')(inputs)
# This creates a model with an Input layer and an output of a dense layer
model = Model(inputs=inputs, outputs=predictions)
encoder_input = Input(shape = (None, len(english_vocab)))
encoder_LSTM = LSTM(256, return_state = True)
encoder_outputs, encoder_h, encoder_c = encoder_LSTM(encoder_input)
encoder_states = [encoder_h, encoder_c]
decoder_input = Input(shape=(None, len(french_vocab)))
decoder_LSTM = LSTM(256, return_sequences=True, return_state = True)
decoder_out, _ , _ = decoder_LSTM(decoder_input,
initial_state=encoder_states)
decoder_dense = Dense(len(french_vocab), activation='softmax')
decoder_out = decoder_dense (decoder_out)
model = Model(inputs=[encoder_input, decoder_input], outputs=[decoder_out])
model.summary()
model.compile(optimizer='adam', loss='categorical_crossentropy')
model.fit(x=[input_data_prefix, input_data_suffix], y=target_data,
batch_size=64, epochs=1, validation_split=0.2)
Natural Language Generation in Python