Introduction to Embeddings with the OpenAI API
Emmanuel Pire
Senior Software Engineer, DataCamp

Cosine distance
from scipy.spatial import distance
distance.cosine([0, 1], [1, 0])
1.0


def create_embeddings(texts):
response = client.embeddings.create(
model="text-embedding-3-small",
input=texts
)
response_dict = response.model_dump()
return [data['embedding'] for data in response_dict['data']]
print(create_embeddings(["Python is the best!", "R is the best!"]))print(create_embeddings("DataCamp is awesome!")[0])
[[0.0050565884448587894, ..., , -0.04000323638319969], [-0.0018890155479311943, ..., -0.04085670784115791]][0.00037010075175203383, ..., -0.021759100258350372]
from scipy.spatial import distance import numpy as npsearch_text = "computer"search_embedding = create_embeddings(search_text)[0]distances = []for article in articles:dist = distance.cosine(search_embedding, article["embedding"])distances.append(dist)min_dist_ind = np.argmin(distances)print(articles[min_dist_ind]['headline'])
Tech Company Launches Innovative Product to Improve Online Accessibility
Introduction to Embeddings with the OpenAI API