Introduction to Embeddings with the OpenAI API
Emmanuel Pire
Senior Software Engineer, DataCamp
Cosine distance
from scipy.spatial import distance
distance.cosine([0, 1], [1, 0])
1.0
def create_embeddings(texts):
response = client.embeddings.create(
model="text-embedding-3-small",
input=texts
)
response_dict = response.model_dump()
return [data['embedding'] for data in response_dict['data']]
print(create_embeddings(["Python is the best!", "R is the best!"]))
print(create_embeddings("DataCamp is awesome!")[0])
[[0.0050565884448587894, ..., , -0.04000323638319969], [-0.0018890155479311943, ..., -0.04085670784115791]]
[0.00037010075175203383, ..., -0.021759100258350372]
from scipy.spatial import distance import numpy as np
search_text = "computer"
search_embedding = create_embeddings(search_text)[0]
distances = []
for article in articles:
dist = distance.cosine(search_embedding, article["embedding"])
distances.append(dist)
min_dist_ind = np.argmin(distances)
print(articles[min_dist_ind]['headline'])
Tech Company Launches Innovative Product to Improve Online Accessibility
Introduction to Embeddings with the OpenAI API