Scalable AI Models with PyTorch Lightning
Sergiy Tkachuk
Director, GenAI Productivity
$$

$$

$$

$$
$$
import torch from torch.quantization import quantize_dynamicmodel_quantized = quantize_dynamic( model, {torch.nn.Linear}, dtype=torch.qint8 )

$$
$$
$$
import time def measure_time(model, data_loader): model.eval() # Set model to evaluation mode start_time = time.time() for inputs in data_loader:_ = model(inputs) end_time = time.time() return end_time - start_timeoriginal_time = measure_time(model, test_loader) quant_time = measure_time(model_quant, test_loader) print(f"Original Model Time: {original_time:.2f}s") print(f"Quantized Model Time: {quant_time:.2f}s")
Scalable AI Models with PyTorch Lightning