Working with Hugging Face
Jacob H. Marquez
Lead Data Engineer





$$



transcriber = pipeline(task="automatic-speech-recognition", model="facebook/wav2vec2-base-960h")# Path to audio file transcriber("my_audio.wav")# Numpy array transcriber(numpy_audio_array)# Dictionary transcriber({"sampling_rate" = 16_000,"raw" = "my_audio.wav"})
sampling_rate = 16_000 dataset = dataset.cast_column("audio", Audio(sampling_rate=sampling_rate))input = data[0]['audio']['array']prediction = transcriber(input)
print(prediction)
"what game do you want to play"
def data(): for i in range(dataset): yield dataset[i]['audio']['array'], dataset[i]['sentence'].lower()output = [] for audio, sentence in data(): prediction = transcriber(audio) output.append((prediction, sentence))
[("what a nice black shirt", "what a nice blue shirt"), ...]


2 / 6 = 0.33
from evaluate import load# Instantiate word error rate metric wer = load("wer")# Save true sentence as reference reference = data[0]['sentence'] predictions = "I love DataCamp portraits on hay"
# Compute the WER between predictions and reference wer_score = wer.compute( predictions=[prediction], references=[reference] )print(wer_score)
0.33
Working with Hugging Face