Deep Learning for Text with PyTorch
Shubham Jain
Instructor

Applications:
Types: binary, multi-class, multi-label




torch.nn.Embedding:
Embedding for 'the': tensor([-0.4689, 0.3164, -0.2971, -0.1291, 0.4064])
Embedding for 'cat': tensor([-0.0978, -0.4764, 0.0476, 0.1044, -0.3976])
Embedding for 'sat': tensor([ 0.2731, 0.4431, 0.1275, 0.1434, -0.4721])
import torch from torch import nnwords = ["The", "cat", "sat", "on", "the", "mat"] word_to_idx = {word: i for i, word in enumerate(words)}inputs = torch.LongTensor([word_to_idx[w] for w in words])embedding = nn.Embedding(num_embeddings=len(words), embedding_dim=10)output = embedding(inputs)print(output)
tensor([[ 1.0624, 0.6792, 0.0459, ... -1.0828, -0.4475, 0.4868],
...
[1.5766, 0.0106, 0.1161, ...,, -0.0859, 1.3160, 1.3621])
def preprocess_sentences(text): # Tokenization # Stemming ...# Word to index mappingclass TextDataset(Dataset): def __init__(self, encoded_sentences): self.data = encoded_sentences def __len__(self): return len(self.data) def __getitem__(self, index): return self.data[index]
def text_processing_pipeline(text): tokens = preprocess_sentences(text) dataset = TextDataset(tokens) dataloader = DataLoader(dataset, batch_size=2, shuffle=True) return dataloader, vectorizertext = "Your sample text here." dataloader, vectorizer = text_processing_pipeline(text)embedding = nn.Embedding(num_embeddings=10, embedding_dim=50) for batch in dataloader: output = embedding(batch) print(output)
Deep Learning for Text with PyTorch