Multi-Modal Systems with the OpenAI API
James Chapman
Curriculum Manager, DataCamp
$$
$$
$$
$$
$$
$$
from openai import OpenAI
client = OpenAI(api_key="ENTER YOUR KEY HERE")
# Open the mp3 file
audio_file = open("recording.mp3", "rb")
# Create a transcript
response = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file)
# Extract and print the transcript
transcript = response.text
print(transcript)
$$
response = client.chat.completions.create( model="gpt-4o-mini", max_completion_tokens=5,
messages=[{"role": "user", "content": f"""Identify the language of the following text and respond only with the country code (e.g., 'en', 'uk', 'fr'): {transcript}"""}])
# Extract detected language language = response.choices[0].message.content print(language)
uk
response = client.chat.completions.create(
model="gpt-4o-mini",
max_completion_tokens=300,
messages=[
{"role": "user", "content": f"""Translate this customer transcript
from country code {language} to English: {transcript}"""}])
# Extract translated text
translated_text = response.choices[0].message.content
print(translated_text)
print(translated_text)
response = client.chat.completions.create(
model="gpt-4o-mini",
max_completion_tokens=300,
messages=[
{"role": "user",
"content": f"""You are an AI assistant that corrects transcripts by fixing
misinterpretations, names, and terminology. Please refine the following
transcript:\n\n{translated_text}"""}])
# Extract corrected text
corrected_text = response.choices[0].message.content
print(corrected_text)
$$
$$
Multi-Modal Systems with the OpenAI API