Spoken Language Processing in Python
Daniel Bourke
Machine Learning Engineer/YouTube Creator
$ pip install pydub
.wav
, install ffmpeg
via ffmpeg.org# Import PyDub main class
from pydub import AudioSegment
# Import an audio file
wav_file = AudioSegment.from_file(file="wav_file.wav", format="wav")
# Format parameter only for readability
wav_file = AudioSegment.from_file(file="wav_file.wav")
type(wav_file)
pydub.audio_segment.AudioSegment
# Install simpleaudio for wav playback
$pip install simpleaudio
# Import play function
from pydub.playback import play
# Import audio file
wav_file = AudioSegment.from_file(file="wav_file.wav")
# Play audio file
play(wav_file)
# Import audio files wav_file = AudioSegment.from_file(file="wav_file.wav") two_speakers = AudioSegment.from_file(file="two_speakers.wav")
# Check number of channels wav_file.channels, two_speakers.channels
1, 2
wav_file.frame_rate
480000
# Find the number of bytes per sample
wav_file.sample_width
2
# Find the max amplitude
wav_file.max
8488
# Duration of audio file in milliseconds
len(wav_file)
3284
# Change ATTRIBUTENAME of AudioSegment to x
changeed_audio_segment = audio_segment.set_ATTRIBUTENAME(x)
# Change sample width to 1
wav_file_width_1 = wav_file.sample_width(1)
wav_file_width_1.sample_width
1
# Change sample rate
wav_file_16k = wav_file.frame_rate(16000)
wav_file_16k.frame_rate
16000
# Change number of channels
wav_file_1_channel = wav_file.set_channels(1)
wav_file_1_channel.channels
1
Spoken Language Processing in Python