Image Search¶
Upload images, store their embeddings, and find similar images with vector search.
What you'll build¶
- Upload images to
/filesand get UUIDs - Store metadata + embeddings in a table
- Create a vector index
- Search by text embedding
Prerequisites¶
You also need an embedding model. This example uses a generic encode_image / encode_text function — plug in any encoder (OpenAI, Cohere, ColQwen, etc.).
Code¶
Setup¶
import io
import requests
from PIL import Image
API_URL = "https://api.deeplake.ai"
TOKEN = "YOUR_TOKEN"
WORKSPACE = "YOUR_WORKSPACE"
TABLE = "image_catalog"
headers = {
"Authorization": f"Bearer {TOKEN}",
"Content-Type": "application/json",
}
auth_headers = {"Authorization": f"Bearer {TOKEN}"}
def query(sql):
res = requests.post(
f"{API_URL}/workspaces/{WORKSPACE}/tables/query",
headers=headers,
json={"query": sql},
)
return res.json()
Create the table¶
query(f"""
CREATE TABLE IF NOT EXISTS "{WORKSPACE}"."{TABLE}" (
id SERIAL PRIMARY KEY,
filename TEXT,
description TEXT,
embedding FLOAT4[],
file_id UUID,
metadata JSONB
) USING deeplake
""")
Upload an image¶
def upload_image(image_bytes, filename):
res = requests.post(
f"{API_URL}/workspaces/{WORKSPACE}/files",
headers=auth_headers,
files={"file": (filename, image_bytes, "image/png")},
timeout=60,
)
return res.json()["id"] if res.status_code in (200, 201) else None
Ingest images with embeddings¶
images = [
{"path": "beach.png", "desc": "Sunset over the ocean"},
{"path": "city.png", "desc": "Downtown skyline at night"},
{"path": "forest.png", "desc": "Misty forest trail"},
]
for img_info in images:
# Read image
with open(img_info["path"], "rb") as f:
img_bytes = f.read()
# Upload to /files
file_id = upload_image(img_bytes, img_info["path"])
# Compute embedding (replace with your encoder)
embedding = encode_image(img_bytes) # returns list[float]
# Insert row
emb_literal = "ARRAY[" + ",".join(str(v) for v in embedding) + "]::float4[]"
query(f"""
INSERT INTO "{WORKSPACE}"."{TABLE}" (filename, description, embedding, file_id)
VALUES ('{img_info["path"]}', '{img_info["desc"]}', {emb_literal}, '{file_id}'::uuid)
""")
Create the index¶
query(f"""
CREATE INDEX IF NOT EXISTS idx_image_vec
ON "{WORKSPACE}"."{TABLE}" USING deeplake_index (embedding DESC)
""")
Search by text¶
search_text = "ocean waves at sunset"
query_embedding = encode_text(search_text) # returns list[float]
emb_literal = "ARRAY[" + ",".join(str(v) for v in query_embedding) + "]::float4[]"
result = query(f"""
SELECT filename, description, file_id,
embedding <#> {emb_literal} AS score
FROM "{WORKSPACE}"."{TABLE}"
ORDER BY score DESC
LIMIT 5
""")
for row in result.get("rows", []):
print(row)
Download a result¶
top_file_id = result["rows"][0][2] # file_id from first result
res = requests.get(
f"{API_URL}/workspaces/{WORKSPACE}/files/{top_file_id}/content",
headers=auth_headers,
)
with open("top_match.png", "wb") as f:
f.write(res.content)
What to try next¶
- Multi-vector search — for fine-grained image matching
- Video retrieval — same pattern, with video chunks
- Hybrid RAG — combine vector with keyword search