Introduction to LLMs in Python
Jasmin Ludolf
Senior Data Science Content Developer, DataCamp
from transformers import Trainer, TrainingArguments training_args = TrainingArguments(
output_dir="./finetuned",
evaluation_strategy="epoch",
num_train_epochs=3,
learning_rate=2e-5,
)
TrainingArguments()
: customize training settingsoutput_dir
: output directoryeval_strategy
: when to evaluate "epoch", "steps", or "none"num_train_epochs
: number of training epochslearning_rate
: for optimizerfrom transformers import Trainer, TrainingArguments training_args = TrainingArguments( output_dir="./finetuned", evaluation_strategy="epoch", num_train_epochs=3, learning_rate=2e-5,
per_device_train_batch_size=8, per_device_eval_batch_size=8,
weight_decay=0.01,
)
per_device_train_batch_size
and per_device_eval_batch_size
define the batch sizeweight_decay
: applied to the optimizer to avoid overfittingfrom transformers import Trainer, TrainingArguments training_args = TrainingArguments(...) trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_training_data,
eval_dataset=tokenized_test_data,
tokenizer=tokenizer
)
trainer.train()
model
: the model to fine-tuneargs
: the training argumentstrain_dataset
: the data used for trainingeval_dataset
: the data used for evaluationtokenizer
: the tokenizerNumber of training loops: Dataset size, num_train_epochs
, per_device_train_batch_size
and per_device_eval_batch_size
{'eval_loss': 0.398524671792984, 'eval_runtime': 33.3145, 'eval_samples_per_second': 46.916,
'eval_steps_per_second': 5.883, 'epoch': 1.0}
{'eval_loss': 0.1745782047510147, 'eval_runtime': 33.5202, 'eval_samples_per_second': 46.629,
'eval_steps_per_second': 5.847, 'epoch': 2.0}
{'loss': 0.4272, 'grad_norm': 15.558795928955078, 'learning_rate': 2.993197278911565e-06,
'epoch': 2.5510204081632653}
{'eval_loss': 0.12216147780418396, 'eval_runtime': 33.2238, 'eval_samples_per_second': 47.045,
'eval_steps_per_second': 5.899, 'epoch': 3.0}
{'train_runtime': 673.0528, 'train_samples_per_second': 6.967, 'train_steps_per_second': 0.874,
'train_loss': 0.40028538347101533, 'epoch': 3.0}
TrainOutput(global_step=588, training_loss=0.40028538347101533, metrics={'train_runtime': 673.0528,
'train_samples_per_second': 6.967, 'train_steps_per_second': 0.874,
'train_loss': 0.40028538347101533, 'epoch': 3.0})
new_data = ["This is movie was disappointing!", "This is the best movie ever!"]
new_input = tokenizer(new_data, return_tensors="pt", padding=True, truncation=True, max_length=64)
with torch.no_grad(): outputs = model(**new_input)
predicted_labels = torch.argmax(outputs.logits, dim=1).tolist() label_map = {0: "NEGATIVE", 1: "POSITIVE"} for i, predicted_label in enumerate(predicted_labels): sentiment = label_map[predicted_label] print(f"\nInput Text {i + 1}: {new_data[i]}") print(f"Predicted Label: {sentiment}")
Input Text 1: This is movie was disappointing!
Predicted Sentiment: NEGATIVE
Input Text 2: This is the best movie ever!
Predicted Sentiment: POSITIVE
model.save_pretrained("my_finetuned_files")
tokenizer.save_pretrained("my_finetuned_files")
# Loading a saved model
model = AutoModelForSequenceClassification.from_pretrained("my_finetuned_files")
tokenizer = AutoTokenizer.from_pretrained("my_finetuned_files")
Introduction to LLMs in Python