Natural Language Processing with spaCy
Azadeh Mobasher
Principal Data Scientist
spaCy
model.import matplotlib.pyplot as plt from sklearn.decomposition import PCA import numpy as np
nlp = spacy.load("en_core_web_md")
words = ["wonderful", "horrible",
"apple", "banana", "orange", "watermelon",
"dog", "cat"]
word_vectors = np.vstack([nlp.vocab.vectors[nlp.vocab.strings[w]] for w in words])
pca = PCA(n_components=2)
word_vectors_transformed = pca.fit_transform(word_vectors)
plt.figure(figsize=(10, 8)) plt.scatter(word_vectors_transformed[:, 0], word_vectors_transformed[:, 1])
for word, coord in zip(words, word_vectors_transformed): x, y = coord plt.text(x, y, word, size=10) plt.show()
spaCy
find semantically similar terms to a given termimport numpy as np import spacy nlp = spacy.load("en_core_web_md") word = "covid"
most_similar_words = nlp.vocab.vectors.most_similar( np.asarray([nlp.vocab.vectors[nlp.vocab.strings[word]]]), n=5) words = [nlp.vocab.strings[w] for w in most_similar_words[0][0]] print(words)
>>> ['Covi', 'CoVid', 'Covici', 'COVID-19', 'corona']
Natural Language Processing with spaCy