Working with Hugging Face
Jacob H. Marquez
Lead Data Engineer
$$
transcriber = pipeline(task="automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
# Path to audio file transcriber("my_audio.wav")
# Numpy array transcriber(numpy_audio_array)
# Dictionary transcriber({"sampling_rate" = 16_000,"raw" = "my_audio.wav"})
sampling_rate = 16_000 dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))
input = data[0]['audio']['array']
prediction = transcriber(input)
print(prediction)
"what game do you want to play"
def data(): for i in range(dataset): yield dataset[i]['audio']['array'], dataset[i]['sentence'].lower()
output = [] for audio, sentence in data(): prediction = transcriber(audio) output.append((prediction, sentence))
[("what a nice black shirt", "what a nice blue shirt"), ...]
2 / 6 = 0.33
from evaluate import load
# Instantiate word error rate metric wer = load("wer")
# Save true sentence as reference reference = data[0]['sentence'] predictions = "I love DataCamp portraits on hay"
# Compute the WER between predictions and reference wer_score = wer.compute( predictions=[prediction], references=[reference] )
print(wer_score)
0.33
Working with Hugging Face