Efficient AI Model Training with PyTorch
Dennis Lee
Data Engineer
training_args = TrainingArguments(output_dir="./results",
evaluation_strategy="epoch",
gradient_accumulation_steps=4)
training_args = TrainingArguments(output_dir="./results", evaluation_strategy="epoch", gradient_accumulation_steps=4, gradient_checkpointing=True)
trainer = Trainer(model=model, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["validation"], compute_metrics=compute_metrics)
trainer.train()
{'epoch': 1.0, 'eval_loss': 0.73, 'eval_accuracy': 0.03, 'eval_f1': 0.05}
accelerator = Accelerator(gradient_accumulation_steps=2)
for index, batch in enumerate(dataloader):
with accelerator.accumulate(model):
inputs, targets = batch["input_ids"], batch["labels"]
outputs = model(inputs, labels=targets)
loss = outputs.loss
accelerator.backward(loss)
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
accelerator = Accelerator(gradient_accumulation_steps=2)
model.gradient_checkpointing_enable()
for index, batch in enumerate(dataloader):
with accelerator.accumulate(model):
inputs, targets = batch["input_ids"], batch["labels"]
outputs = model(inputs, labels=targets)
loss = outputs.loss
accelerator.backward(loss)
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
for index, batch in enumerate(dataloader):
with accelerator.accumulate(model):
inputs, targets = batch["input_ids"], batch["labels"]
outputs = model(inputs, labels=targets)
loss = outputs.loss
accelerator.backward(loss)
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
from accelerate.local_sgd import LocalSGD
with LocalSGD(accelerator=accelerator, model=model, local_sgd_steps=8,
enabled=True) as local_sgd:
for index, batch in enumerate(dataloader):
with accelerator.accumulate(model):
inputs, targets = batch["input_ids"], batch["labels"]
outputs = model(inputs, labels=targets)
loss = outputs.loss
accelerator.backward(loss)
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
from accelerate.local_sgd import LocalSGD
with LocalSGD(accelerator=accelerator, model=model, local_sgd_steps=8,
enabled=True) as local_sgd:
for index, batch in enumerate(dataloader):
with accelerator.accumulate(model):
inputs, targets = batch["input_ids"], batch["labels"]
outputs = model(inputs, labels=targets)
loss = outputs.loss
accelerator.backward(loss)
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
local_sgd.step()
Efficient AI Model Training with PyTorch