Trabajar con Hugging Face
Jacob H. Marquez
Lead Data Engineer
from transformers import pipeline
my_pipeline = pipeline(
"text-classification",
model="distilbert-base-uncased-finetuned-sst-2-english"))
print(my_pipeline("¡El Wi-Fi va más lento que un caracol hoy!"))
[{'label': 'NEGATIVE', 'score': 0.99}]
$$
$$

$$
from transformers import AutoModelForSequenceClassification# Descarga un modelo preentrenado de clasificación de texto model = AutoModelForSequenceClassification.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english" )
$$
from transformers import AutoTokenizer# Recupera el tokenizador emparejado con el modelo tokenizer = AutoTokenizer.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english" )
$$
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")# Tokeniza el texto de entrada tokens = tokenizer.tokenize("AI: Helping robots think and humans overthink:)") print(tokens)
['ai', ':', 'helping', 'robots', 'think', 'and',
'humans', 'over', '##thi', '##nk', ':', ')']
Nuestro modelo (distilbert-base-uncased):
['ai', ':', 'helping', 'robots', 'think', 'and', 'humans', 'over', '##thi',
'##nk', ':', ')']
Tokenizador BERT-Base-Cased:
['AI', ':', 'Help', '##ing', 'robots', 'think', 'and', 'humans', 'over',
'##thin', '##k', ':', ')']
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline# Descarga el modelo y el tokenizador my_model = AutoModelForSequenceClassification.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english") my_tokenizer = AutoTokenizer.from_pretrained( "distilbert-base-uncased-finetuned-sst-2-english")# Crea el pipeline personalizado my_pipeline = pipeline( task="sentiment-analysis", model=my_model, tokenizer=my_tokenizer)
$$
🔧 Úsalas para más control y personalización
📝 Preprocesado de texto: Limpia y tokeniza para casos concretos
$$

Trabajar con Hugging Face