Pemrograman Paralel dengan Dask di Python
James Fulton
Climate Informatics Researcher

import glob
video_filenames = glob.glob("*.mp4")
print(video_filenames)
['me_at_the_zoo.mp4', 'life_goes_on.mp4', 'guitar.mp4', 'hurt.mp4', ...]
import glob
video_filenames = glob.glob("*.mp4")
import dask.bag as db
filename_bag = db.from_sequence(video_filenames)
filename_bag.take(1)[0]
'me_at_the_zoo.mp4'
# Memuat satu video
load_mp4("video.mp4")
{'video': array(
[[[ 51, 57, 37, ..., 227, 238, 168],
...,
[ 83, 125, 129, ..., 222, 148, 208]]]),
'audio': array([ 7. , 9. , 9.5, ..., -544.5, -551. , -558. ]),
'filename': 'video.mp4'}
data_bag = filename_bag.map(load_mp4)
data_bag.take(1)[0]
{'video': array(
[126, 162, 203, ..., 63, 58, 8],
...,
[ 58, 222, 170, ..., 234, 63, 81]]]),
'audio': array([-203.5, -209. , -207. , ..., -222.5, -233. , -248.5]),
'filename': 'me_at_the_zoo.mp4'}
data_bag = filename_bag.map(load_mp4)
# Buat list kosong
data_list = []
# Tambahkan file yang dimuat tertunda ke list
for file in video_filenames:
data_list.append(dask.delayed(load_mp4)(file))
# Ubah list objek tertunda menjadi Dask bag
data_bag = db.from_delayed(data_list)
# Ubah Dask bag menjadi list objek tertunda
data_list = data_bag.to_delayed()
transcribed_bag = data_bag.map(transcribe_audio)
transcribed_bag.take(1)[0]
{'video': array(
[126, 162, 203, ..., 63, 58, 8],
...,
[ 58, 222, 170, ..., 234, 63, 81]]]),
'audio': array([-203.5, -209. , -207. , ..., -222.5, -233. , -248.5]),
'filename': 'me_at_the_zoo.mp4'
'transcript': "All right, so here we are in front of the, uh, elephants ...",
}
# Terapkan fungsi kustom untuk menghapus video tanpa ucapan
clean_bag = transcribed_bag.filter(transcript_is_not_blank)
# Terapkan analisis sentimen pada transkrip
sentiment_bag = clean_bag.map(analyze_transcript_sentiment)
# Hapus elemen yang tidak diperlukan dari bag
keys_to_drop = ['video', 'audio']
final_bag = sentiment_bag.map(filter_dictionary, keys_to_drop=keys_to_drop)
# Konversi ke Dask DataFrame
df = final_bag.to_dataframe()
df.compute()
filename transcript sentiment
0 me_at_the_zoo.mp4 All right, so here ... positive
... ... ... ...
# Impor modul scipy untuk file .wav
from scipy.io import wavfile
# Muat frekuensi sampling dan array audio
sample_freq, audio = wavfile.read(filename)
# Sampel per detik
print(sample_freq)
44100
# Data audio
print(audio)
array([ 148, 142, 150, ..., -542, -546, -559], dtype=int16)
Pemrograman Paralel dengan Dask di Python