Deep Learning for Text with PyTorch
Shubham Jain
Data Scientist
Use cases:

Prerequisite course: Intermediate Deep Learning with PyTorch


![]()

torchtextfrom torchtext.data.utils import get_tokenizertokenizer = get_tokenizer("basic_english")tokens = tokenizer("I am reading a book now. I love to read books!") print(tokens)
["I", "am", "reading", "a", "book", "now", ".", "I", "love", "to", "read",
"books", "!"]
import nltk nltk.download('stopwords') from nltk.corpus import stopwordsstop_words = set(stopwords.words('english'))tokens = ["I", "am", "reading", "a", "book", "now", ".", "I", "love", "to", "read", "books", "!"] filtered_tokens = [token for token in tokens if token.lower() not in stop_words]print(filtered_tokens)
["reading", "book", ".", "love", "read", "books", "!"]
import nltk from nltk.stem import PorterStemmerstemmer = PorterStemmer()filtered_tokens = ["reading", "book", ".", "love", "read", "books", "!"]stemmed_tokens = [stemmer.stem(token) for token in filtered_tokens]print(stemmed_tokens)
["read", "book", ".", "love", "read", "book", "!"]
from nltk.probability import FreqDist stemmed_tokens= ["read", "book", ".", "love", "read", "book", "!"] freq_dist = FreqDist(stemmed_tokens)threshold = 2common_tokens = [token for token in stemmed_tokens if freq_dist[token] > threshold] print(common_tokens)
["read", "book", "read", "book"]
Tokenization, stopword removal, stemming, and rare word removal
Deep Learning for Text with PyTorch