Fine-Tuning with Llama 3
Francesca Donadoni
Curriculum Manager, DataCamp

model_name="Maykeye/TinyLLama-v0" model = AutoModelForCausalLM.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name)tokenizer.pad_token = tokenizer.eos_token
training_arguments = TrainingArguments(per_device_train_batch_size=1,learning_rate=2e-3,max_grad_norm=0.3,max_steps=200,... gradient_accumulation_steps=2, save_steps=10,)
trainer = SFTTrainer(model=model, tokenizer=tokenizer,train_dataset=dataset, dataset_text_field='conversation',max_seq_length=250,args=training_arguments)
trainer.train()
TrainOutput(global_step=200, training_loss=1.9401231002807617,
metrics={'train_runtime': 142.5501,
'train_samples_per_second': 2.806,
'train_steps_per_second': 1.403,
'total_flos': 1461265827840.0,
'train_loss': 1.9401231002807617,
'epoch': 2.0})
import evaluaterouge = evaluate.load('rouge')predictions = ["hello there", "general kenobi"] references = ["hello there", "master yoda"]results = rouge.compute(predictions=predictions, references=references) print(results)
{'rouge1': 0.5, 'rouge2': 0.5, 'rougeL': 0.5, 'rougeLsum': 0.5}
evaluation_datasetdef generate_predictions_and_reference(dataset): predictions = [] references = [] for row in dataset: inputs = tokenizer.encode(row["instruction"], return_tensors="pt")outputs = model.generate(inputs)decoded_outputs = tokenizer.decode(outputs[0, inputs.shape[1]:], skip_special_tokens = True)references += [row["response"]] predictions += [decoded_outputs] return references, predictions
references, predictions = generate_predictions_and_reference(evaluation_dataset)
rouge = evaluate.load('rouge')
results = rouge.compute(predictions=predictions, references=references)
print(results)
Fine-tuned
{'rouge1': 0.22425812699023645,
'rouge2': 0.039502543246449,
'rougeL': 0.1501513006868983,
'rougeLsum': 0.18685597710721613}
No fine-tuning
{'rouge1': 0.1310928764315105,
'rouge2': 0.04581654122835097,
'rougeL': 0.08415351421221628,
'rougeLsum': 0.1224749866097021}
Fine-Tuning with Llama 3