Drop-in LlamaIndex components backed by Dakera — persistent agent memory and server-side vector indexing with no local embedding model.
llamaindex-dakera
docker run -d \ --name dakera \ -p 3300:3300 \ -e DAKERA_ROOT_API_KEY=dk-mykey \ ghcr.io/dakera-ai/dakera:latest curl http://localhost:3300/health
pip install llamaindex-dakera
Requirements: Python ≥ 3.10, a running Dakera server.
from llama_index_dakera import DakeraMemoryStore, DakeraIndexStore # Agent memory memory = DakeraMemoryStore( api_url="http://localhost:3300", api_key="dk-mykey", agent_id="my-agent", ) # RAG index — server handles embedding vector_store = DakeraIndexStore( api_url="http://localhost:3300", api_key="dk-mykey", namespace="my-docs", )
Persistent conversation memory for LlamaIndex agents. Drop-in replacement for the default in-memory chat store.
from llama_index.core.agent import ReActAgent from llama_index.core.memory import ChatMemoryBuffer from llama_index.llms.openai import OpenAI from llama_index_dakera import DakeraMemoryStore store = DakeraMemoryStore( api_url="http://localhost:3300", api_key="dk-mykey", agent_id="react-agent", ) memory = ChatMemoryBuffer.from_defaults( token_limit=3000, chat_store=store, chat_store_key="user-1", ) agent = ReActAgent.from_tools( tools=[...], llm=OpenAI(model="gpt-4o"), memory=memory, verbose=True, ) # First session response = agent.chat("My project is called NeuralBridge.") # Later session — memory persists response = agent.chat("What's the name of my project?") print(response) # "Your project is called NeuralBridge."
api_url
api_key
""
agent_id
top_k
5
min_importance
0.0
Server-side embedded vector store for RAG. Dakera embeds documents on the server — no local model, no OpenAI embeddings API needed.
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext from llama_index_dakera import DakeraIndexStore documents = SimpleDirectoryReader("./docs").load_data() vector_store = DakeraIndexStore( api_url="http://localhost:3300", api_key="dk-mykey", namespace="product-docs", ) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents( documents, storage_context=storage_context, ) query_engine = index.as_query_engine(similarity_top_k=4) response = query_engine.query("How does the billing work?") print(response)
from llama_index.core.chat_engine import CondensePlusContextChatEngine from llama_index.core.memory import ChatMemoryBuffer from llama_index_dakera import DakeraIndexStore, DakeraMemoryStore vector_store = DakeraIndexStore( api_url="http://localhost:3300", api_key="dk-mykey", namespace="product-docs", ) storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_defaults(storage_context=storage_context) memory_store = DakeraMemoryStore( api_url="http://localhost:3300", api_key="dk-mykey", agent_id="doc-chat", ) chat_engine = CondensePlusContextChatEngine.from_defaults( retriever=index.as_retriever(similarity_top_k=4), memory=ChatMemoryBuffer.from_defaults(chat_store=memory_store), ) response = chat_engine.chat("What are the pricing tiers?") print(response)
namespace
embedding_model
Memory and vector store for chains
Long-term memory for crews
Memory for multi-agent teams
TypeScript integration