Working with Hugging Face
Jacob H. Marquez
Lead Data Engineer
$$
pip install datasets
from datasets import load_dataset_builder # Load dataset metadata data_builder = load_dataset_builder("imdb")
# Access dataset size dataset_size_mb = data_builder.info.dataset_size / (1024 ** 2)
print(f"Dataset size: {round(dataset_size_mb, 2)} MB")
Dataset size: 127.02 MB
from datasets import load_dataset
data = load_dataset("imdb")
$$
Split parameter
data = load_dataset("imdb", split="train")
imdb = load_dataset("imdb", split="train")
# Filter imdb filtered = imdb.filter(lambda row: row['label']==0)
Dataset({
features: ['text', 'label'],
num_rows: 12500
})
# Slicing sliced = filtered.select(range(2))
print(sliced)
Dataset({features: ['text', 'label'], num_rows: 2})
print(sliced[0]['text'])
I rented I AM CURIOUS-YELLOW...
$$
$$
$$
$$
Working with Hugging Face