Skip to content

Agent Memory

Store agent conversation traces in a table. Index them for fast retrieval. Query memory before your agents respond.

What you'll build

  • A traces table for storing full agent conversations as JSONB
  • A memory table with embeddings for semantic retrieval
  • BM25 + vector + hybrid search over agent history
  • A reusable search_memory function for multi-agent tools

Code

Setup

import requests
import json

API_URL = "https://api.deeplake.ai"
TOKEN = "YOUR_TOKEN"
WORKSPACE = "YOUR_WORKSPACE"
TRACES_TABLE = "agent_traces"
MEMORY_TABLE = "agent_memory"

headers = {
    "Authorization": f"Bearer {TOKEN}",
    "Content-Type": "application/json",
}

def query(sql):
    res = requests.post(
        f"{API_URL}/workspaces/{WORKSPACE}/tables/query",
        headers=headers,
        json={"query": sql},
    )
    return res.json()

Create the traces table

Store full conversations as JSONB. This is your append-only source of truth.

query(f"""
    CREATE TABLE IF NOT EXISTS "{WORKSPACE}"."{TRACES_TABLE}" (
        id BIGSERIAL PRIMARY KEY,
        agent TEXT,
        model TEXT,
        task TEXT,
        run_id TEXT,
        conversations JSONB,
        created_at TIMESTAMPTZ DEFAULT NOW()
    ) USING deeplake
""")

Ingest traces

traces = [
    {
        "agent": "code-assistant",
        "model": "claude-sonnet-4-5-20250929",
        "task": "Fix authentication timeout in login flow",
        "run_id": "run_001",
        "conversations": [
            {"role": "user", "content": "The login times out after 30s"},
            {"role": "assistant", "content": "I see the issue in auth_middleware.py..."},
        ],
    },
    {
        "agent": "debug-oracle",
        "model": "claude-sonnet-4-5-20250929",
        "task": "Investigate OOM error in batch pipeline",
        "run_id": "run_002",
        "conversations": [
            {"role": "user", "content": "Workers crash with OOM on large batches"},
            {"role": "assistant", "content": "The batch size is unbounded..."},
        ],
    },
]

for trace in traces:
    conv_json = json.dumps(trace["conversations"]).replace("'", "''")
    query(f"""
        INSERT INTO "{WORKSPACE}"."{TRACES_TABLE}" (agent, model, task, run_id, conversations)
        VALUES (
            '{trace["agent"]}', '{trace["model"]}',
            '{trace["task"]}', '{trace["run_id"]}',
            '{conv_json}'::jsonb
        )
    """)

Create the memory table (with embeddings)

Copy traces into a memory table and add an embedding column for semantic search.

query(f"""
    CREATE TABLE IF NOT EXISTS "{WORKSPACE}"."{MEMORY_TABLE}" (
        id BIGSERIAL PRIMARY KEY,
        agent TEXT,
        model TEXT,
        task TEXT,
        run_id TEXT,
        conversations JSONB,
        embedding FLOAT4[],
        created_at TIMESTAMPTZ DEFAULT NOW()
    ) USING deeplake
""")

Generate and store embeddings

Embed the task field. Use any embedding API (OpenAI, Cohere, OpenRouter, etc.).

def embed_texts(texts):
    """Replace with your embedding provider."""
    # Example using OpenRouter:
    res = requests.post(
        "https://openrouter.ai/api/v1/embeddings",
        headers={
            "Authorization": "Bearer YOUR_OPENROUTER_KEY",
            "Content-Type": "application/json",
        },
        json={"model": "openai/text-embedding-3-small", "input": texts},
    )
    return [item["embedding"] for item in res.json()["data"]]


# Fetch tasks without embeddings
result = query(f'SELECT id, task FROM "{WORKSPACE}"."{MEMORY_TABLE}" WHERE embedding IS NULL LIMIT 100')

if result.get("rows"):
    ids = [row[0] for row in result["rows"]]
    tasks = [row[1] or "" for row in result["rows"]]
    embeddings = embed_texts(tasks)

    for row_id, emb in zip(ids, embeddings):
        emb_literal = "ARRAY[" + ",".join(str(v) for v in emb) + "]::float4[]"
        query(f'UPDATE "{WORKSPACE}"."{MEMORY_TABLE}" SET embedding = {emb_literal} WHERE id = {row_id}')

Create indexes

# Vector index for semantic search
query(f"""
    CREATE INDEX IF NOT EXISTS idx_memory_vec
    ON "{WORKSPACE}"."{MEMORY_TABLE}" USING deeplake_index (embedding DESC)
""")

# BM25 index for keyword search on task
query(f"""
    CREATE INDEX IF NOT EXISTS idx_memory_bm25
    ON "{WORKSPACE}"."{MEMORY_TABLE}" USING deeplake_index (task)
    WITH (index_type = 'bm25')
""")

Search: BM25 (keyword precision)

Find traces by exact error names, function names, or identifiers.

result = query(f"""
    SELECT task, agent, run_id,
           task <#> 'authentication timeout' AS score
    FROM "{WORKSPACE}"."{MEMORY_TABLE}"
    ORDER BY score ASC
    LIMIT 5
""")

Search: vector (semantic similarity)

Find conceptually similar tasks even with different wording.

search_text = "login takes too long and fails"
emb = embed_texts([search_text])[0]
emb_literal = "ARRAY[" + ",".join(str(v) for v in emb) + "]::float4[]"

result = query(f"""
    SELECT task, agent, run_id,
           embedding <#> {emb_literal} AS score
    FROM "{WORKSPACE}"."{MEMORY_TABLE}"
    ORDER BY score DESC
    LIMIT 5
""")

Search: hybrid (best default)

Combine both signals with tunable weights.

search_text = "fix authentication timeout"
emb = embed_texts([search_text])[0]
emb_literal = "ARRAY[" + ",".join(str(v) for v in emb) + "]::float4[]"

result = query(f"""
    SELECT task, agent, run_id,
           (embedding, task) <#> deeplake_hybrid_record(
               {emb_literal},
               '{search_text}',
               0.5, 0.5
           ) AS score
    FROM "{WORKSPACE}"."{MEMORY_TABLE}"
    WHERE embedding IS NOT NULL
    ORDER BY score ASC
    LIMIT 5
""")

Reusable memory tool

Wrap search into a function your agents can call:

def search_memory(question, top_k=5, vector_weight=0.5):
    """Search agent memory with hybrid BM25 + vector."""
    emb = embed_texts([question])[0]
    emb_literal = "ARRAY[" + ",".join(str(v) for v in emb) + "]::float4[]"
    bm25_weight = 1.0 - vector_weight

    result = query(f"""
        SELECT task, agent, conversations, run_id,
               (embedding, task) <#> deeplake_hybrid_record(
                   {emb_literal},
                   '{question.replace("'", "''")}',
                   {vector_weight}, {bm25_weight}
               ) AS score
        FROM "{WORKSPACE}"."{MEMORY_TABLE}"
        WHERE embedding IS NOT NULL
        ORDER BY score ASC
        LIMIT {top_k}
    """)
    return result.get("rows", [])

Now any agent — Code Assistant, Debug Oracle, Strategy Learner — can call search_memory() before responding.

Deterministic replay

Every trace stores full conversations as JSONB. Replay any session:

result = query(f"SELECT conversations FROM \"{WORKSPACE}\".\"{TRACES_TABLE}\" WHERE run_id = 'run_001'")
conversation = result["rows"][0][0]  # full JSON conversation

Same inputs, same context, repeatable investigations.

What to try next