Working with Hugging Face
Jacob H. Marquez
Lead Data Engineer
from transformers import pipeline
my_pipeline = pipeline(
"text-classification",
model="distilbert-base-uncased-finetuned-sst-2-english"))
print(my_pipeline("Wi-Fi is slower than a snail today!"))
[{'label': 'NEGATIVE', 'score': 0.99}]
$$
$$
$$
from transformers import AutoModelForSequenceClassification
# Download a pre-trained text classification model model = AutoModelForSequenceClassification.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english" )
$$
from transformers import AutoTokenizer
# Retrieve the tokenizer paired with the model tokenizer = AutoTokenizer.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english" )
$$
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
# Tokenize input text tokens = tokenizer.tokenize("AI: Helping robots think and humans overthink:)") print(tokens)
['ai', ':', 'helping', 'robots', 'think', 'and',
'humans', 'over', '##thi', '##nk', ':', ')']
Our model (distilbert-base-uncased):
['ai', ':', 'helping', 'robots', 'think', 'and', 'humans', 'over', '##thi',
'##nk', ':', ')']
BERT-Base-Cased Tokenizer:
['AI', ':', 'Help', '##ing', 'robots', 'think', 'and', 'humans', 'over',
'##thin', '##k', ':', ')']
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
# Download the model and tokenizer my_model = AutoModelForSequenceClassification.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english") my_tokenizer = AutoTokenizer.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english")
# Create the custom pipeline my_pipeline = pipeline( task="sentiment-analysis", model=my_model, tokenizer=my_tokenizer)
$$
🔧 Use for more control and customization
📝 Text Preprocessing: Clean and tokenize for specific use cases
$$
Working with Hugging Face