Getting started
Installation
pip install pocket-memQuick start
Install Ollama and pull the default model:
# Linux/WSL
curl -fsSL https://ollama.com/install.sh | sh
# macOS
brew install ollama
ollama pull qwen2.5:7b
ollama servefrom pocket_mem import MemoryAgent
agent = MemoryAgent(project="my-app")
agent.observe(
user_input="My boss David recommended Cursor IDE",
agent_response="Got it, I'll keep that in mind."
)
context = agent.recall("What did David recommend?", mode="context")
print(context)
# → "Entity: David (boss) — recommended Cursor IDE for coding."Core API
observe()
agent.observe(
user_input="The user's message",
agent_response="The agent's response"
)
# Non-blocking. Returns immediately. Runs in background thread.recall()
agent.recall(query: str, mode: str = "context") -> str | dict
# mode options: "context" | "answer" | "raw"topics()
agent.topics() -> list[str]
# Returns all topic labels currently in memory.
# ["People I Know", "Dev Tools", "Decisions"]stats()
agent.stats() -> dict
# {"nodes": 142, "edges": 89, "topics": 7, "sessions": 4}forget()
agent.forget(query: str) -> None
# Removes nodes matching the query from the graph.
agent.forget("David")as_tool()
agent.as_tool() -> dict
# Returns an OpenAI-compatible function schema for tool-call pattern.
tools = [agent.as_tool()]export()
agent.export(path: str) -> None
agent.export("my_project.mempack")
# Creates a .mempack zip file (SQLite db + manifest).import_pack()
agent.import_pack(path: str) -> None
agent.import_pack("colleague_project.mempack")
# Merges a .mempack into current memory. No duplicates.Recall modes
context — best for most use cases
context = agent.recall("What database did we decide on?", mode="context")
# Returns: formatted string ready to inject into a system prompt.
# No LLM call. Fast. Deterministic.
system = f"You are a helpful assistant.\n\n## Memory\n{context}"answer — when you want pocket-mem to answer directly
answer = agent.recall("Who recommended httpx?", mode="answer")
# Returns: natural language answer synthesized by LLM.
# Makes one LLM call.
# Best results with Claude Haiku or GPT-4o-mini.raw — for debugging
import json
data = agent.recall("David", mode="raw")
print(json.dumps(data, indent=2))
# Returns: {"nodes": [...], "edges": [...]}
# No LLM call. Live graph query.Wiring patterns
Pattern A — proactive injection (recommended)
memory = MemoryAgent(project="my-app")
def chat(user_message: str) -> str:
context = memory.recall(user_message, mode="context")
system = f"""You are a helpful assistant.
## What you remember
{context}
"""
response = your_llm.chat(system=system, user=user_message)
memory.observe(user_input=user_message, agent_response=response)
return responsePattern B — tool call
memory = MemoryAgent(project="my-app")
tools = [memory.as_tool()]
def handle_tool_call(name: str, args: dict) -> str:
if name == "recall_memory":
return memory.recall(args["query"], mode=args.get("mode", "context"))LLM config
Local Ollama (default)
from pocket_mem import MemoryAgent
# No config needed — uses qwen2.5:7b at localhost:11434 by default
agent = MemoryAgent(project="my-app")Cloud model
import os
from pocket_mem import MemoryAgent, LLMConfig
agent = MemoryAgent(
project="my-app",
llm=LLMConfig(
base_url="https://api.anthropic.com/v1",
model="claude-haiku-4-5-20251001",
api_key=os.environ["ANTHROPIC_API_KEY"]
)
)Split config — local for ingestion, cloud for answers
import os
from pocket_mem import MemoryAgent, LLMConfig
agent = MemoryAgent(
project="my-app",
llm=LLMConfig(
base_url="http://localhost:11434/v1",
model="qwen2.5:7b",
answer_base_url="https://api.anthropic.com/v1",
answer_model="claude-haiku-4-5-20251001",
answer_api_key=os.environ["ANTHROPIC_API_KEY"],
)
)Identity
Basic setup
from pocket_mem import MemoryAgent, IdentityConfig
agent = MemoryAgent(
project="my-app",
identity=IdentityConfig(
description="Sales representative at Acme Corp managing enterprise B2B deals"
)
)With Gemini for higher quality derivation
import os
from pocket_mem import MemoryAgent, IdentityConfig
agent = MemoryAgent(
project="my-app",
identity=IdentityConfig(
description="Hedge fund analyst covering emerging markets",
derivation_api_key=os.environ["GEMINI_API_KEY"]
)
)
# Identity derivation runs once and is cached locally. Restarts load from cache instantly.Storage
Default (./memory/)
# Creates ./memory/my-app.db in current directory automatically
agent = MemoryAgent(project="my-app")Custom path
agent = MemoryAgent(project="my-app", path="./data/memory/")Sharing memory
Export
agent.export("project_memory.mempack")
# Creates a portable zip: SQLite db + manifest.jsonImport
their_agent = MemoryAgent(project="my-app")
their_agent.import_pack("project_memory.mempack")
# Their agent now has all your memoryVisualizer
Open graph explorer
pocket-mem show
pocket-mem show --project my-appFilters
# By topic
pocket-mem show --topic "People I Know"
pocket-mem show --topic "People I Know" --topic "Dev Tools"
# By node type
pocket-mem show --type entity
pocket-mem show --type entity --type event
# By date
pocket-mem show --since 7d
pocket-mem show --since 2025-01-01
# Search
pocket-mem show --search DavidSomething missing? Open an issue on GitHub.