Reading audio files with SpeechRecognition

Spoken Language Processing in Python

Daniel Bourke

Machine Learning Engineer/YouTube Creator

The AudioFile class

import speech_recognition as sr

# Setup recognizer instance
recognizer = sr.Recognizer()

# Read in audio file
clean_support_call = sr.AudioFile("clean-support-call.wav")

# Check type of clean_support_call
type(clean_support_call)

<class 'speech_recognition.AudioFile'>

From AudioFile to AudioData

recognizer.recognize_google(audio_data=clean_support_call)

AssertionError: ``audio_data`` must be audio data

# Convert from AudioFile to AudioData
with clean_support_call as source:

    # Record the audio
    clean_support_call_audio = recognizer.record(source)

# Check the type
type(clean_support_call_audio)

<class 'speech_recognition.AudioData'>

Transcribing our AudioData

# Transcribe clean support call
recognizer.recognize_google(audio_data=clean_support_call_audio)

hello I'd like to get some help setting up my account please

Duration and offset

duration and offset both None by default

# Leave duration and offset as default
with clean_support_call as source:
    clean_support_call_audio = recognizer.record(source,
                                                 duration=None,
                                                 offset=None)

# Get first 2-seconds of clean support call
with clean_support_call as source:
    clean_support_call_audio = recognizer.record(source,
                                                 duration=2.0)

hello I'd like to get

Let's practice!

Spoken Language Processing in Python