Spoken Language Processing in Python
Daniel Bourke
Machine Learning Engineer/YouTube Creator
# Import os module
import os
# Check the folder of audio files
os.listdir("acme_audio_files")
(['call_1.mp3',
'call_2.mp3',
'call_3.mp3',
'call_4.mp3'])
import speech_recognition as sr from pydub import AudioSegment
# Import call 1 and convert to .wav call_1 = AudioSegment.from_file("acme_audio_files/call_1.mp3") call_1.export("acme_audio_files/call_1.wav", format="wav")
# Transcribe call 1 recognizer = sr.Recognizer() call_1_file = sr.AudioFile("acme_audio_files/call_1.wav") with call_1_file as source: call_1_audio = recognizer.record(call_1_file) recognizer.recognize_google(call_1_audio)
convert_to_wav()
converts non-.wav
files to .wav
files.show_pydub_stats()
shows the audio attributes of a .wav
file.transcribe_audio()
uses recognize_google()
to transcribe a .wav
file.# Create function to convert audio file to wav def convert_to_wav(filename): "Takes an audio file of non .wav format and converts to .wav" # Import audio file audio = AudioSegment.from_file(filename)
# Create new filename new_filename = filename.split(".")[0] + ".wav"
# Export file as .wav audio.export(new_filename, format="wav") print(f"Converting {filename} to {new_filename}...")
convert_to_wav("acme_studios_audio/call_1.mp3")
Converting acme_audio_files/call_1.mp3 to acme_audio_files/call_1.wav...
def show_pydub_stats(filename): "Returns different audio attributes related to an audio file." # Create AudioSegment instance audio_segment = AudioSegment.from_file(filename)
# Print attributes print(f"Channels: {audio_segment.channels}") print(f"Sample width: {audio_segment.sample_width}") print(f"Frame rate (sample rate): {audio_segment.frame_rate}") print(f"Frame width: {audio_segment.frame_width}") print(f"Length (ms): {len(audio_segment)}") print(f"Frame count: {audio_segment.frame_count()}")
show_pydub_stats("acme_audio_files/call_1.wav")
Channels: 2
Sample width: 2
Frame rate (sample rate): 32000
Frame width: 4
Length (ms): 54888
Frame count: 1756416.0
# Create a function to transcribe audio def transcribe_audio(filename): "Takes a .wav format audio file and transcribes it to text." # Setup a recognizer instance recognizer = sr.Recognizer()
# Import the audio file and convert to audio data audio_file = sr.AudioFile(filename) with audio_file as source: audio_data = recognizer.record(audio_file) # Return the transcribed text return recognizer.recognize_google(audio_data)
transcribe_audio("acme_audio_files/call_1.wav")
"hello welcome to Acme studio support line my name is Daniel how can I best help
you hey Daniel this is John I've recently bought a smart from you guys and I know
that's not good to hear John let's let's get your cell number and then we
can we can set up a way to fix it for you one number for 1757 varies how long do
you reckon this is going to take about an hour now while John we're going to try
our best hour I will we get the sealing member will start up this support case
I'm just really really really really I've been trying to contact 34 been put on
hold more than an hour and half so I'm not really happy I kind of wanna get this
issue 6 is fossil"
Spoken Language Processing in Python