Scalable AI Models with PyTorch Lightning
Sergiy Tkachuk
Director, GenAI Productivity
$$
$$
$$
$$
$$
import torch from torch.quantization import quantize_dynamic
model_quantized = quantize_dynamic( model, {torch.nn.Linear}, dtype=torch.qint8 )
$$
$$
$$
import time def measure_time(model, data_loader): model.eval() # Set model to evaluation mode start_time = time.time() for inputs in data_loader:
_ = model(inputs) end_time = time.time() return end_time - start_time
original_time = measure_time(model, test_loader) quant_time = measure_time(model_quant, test_loader) print(f"Original Model Time: {original_time:.2f}s") print(f"Quantized Model Time: {quant_time:.2f}s")
Scalable AI Models with PyTorch Lightning