Building Agents
Agent Memory
Give your agents persistent memory using short-term context and long-term vector storage.
Types of Agent Memory
| Memory Type | Description | Storage |
|---|---|---|
| Short-term | Current conversation context | LLM context window |
| Long-term | Persistent facts and experiences | Vector DB / SQL |
| Episodic | Past conversation summaries | Vector DB |
| Semantic | Knowledge and facts | Vector DB |
| Working | Intermediate reasoning steps | In-context |
Context Window Limitations
LLMs have limited context windows. For long conversations:
- Summarize old messages and compress them
- Store important information in a vector DB
- Retrieve relevant memories at query time
Memory Design Patterns
- Sliding window: Keep last N messages
- Summarization: Compress old messages into a summary
- RAG memory: Store facts in vector DB, retrieve relevant ones
- Hierarchical: Short-term → compress → long-term
Example
python
from anthropic import Anthropic
from collections import deque
import json
client = Anthropic()
class AgentWithMemory:
def __init__(self, max_short_term: int = 10):
self.short_term = deque(maxlen=max_short_term * 2) # pairs of user/assistant
self.long_term_facts = [] # simple list; use vector DB in production
self.conversation_summary = ""
def remember_fact(self, fact: str):
"""Store an important fact in long-term memory"""
self.long_term_facts.append(fact)
def build_system_prompt(self) -> str:
parts = ["You are a helpful assistant with memory capabilities."]
if self.conversation_summary:
parts.append(f"
Conversation summary so far:
{self.conversation_summary}")
if self.long_term_facts:
facts = "
".join(f"- {f}" for f in self.long_term_facts[-10:])
parts.append(f"
Important facts I remember:
{facts}")
return "
".join(parts)
def should_summarize(self) -> bool:
return len(self.short_term) >= 18 # near the deque limit
def summarize_conversation(self):
"""Compress old messages into a summary"""
old_messages = list(self.short_term)[:10]
summary_response = client.messages.create(
model="claude-3-5-haiku-20241022",
max_tokens=300,
messages=[
{"role": "user", "content": f"Summarize this conversation in 2-3 sentences:
{json.dumps(old_messages)}"}
]
)
new_summary = summary_response.content[0].text
if self.conversation_summary:
self.conversation_summary += " " + new_summary
else:
self.conversation_summary = new_summary
# Remove old messages from short-term memory
for _ in range(10):
if self.short_term:
self.short_term.popleft()
def chat(self, user_message: str) -> str:
if self.should_summarize():
self.summarize_conversation()
self.short_term.append({"role": "user", "content": user_message})
response = client.messages.create(
model="claude-3-5-haiku-20241022",
max_tokens=1024,
system=self.build_system_prompt(),
messages=list(self.short_term)
)
assistant_message = response.content[0].text
self.short_term.append({"role": "assistant", "content": assistant_message})
# Auto-extract facts (in production, use LLM to detect important facts)
if "my name is" in user_message.lower():
name = user_message.lower().split("my name is")[1].strip().split()[0]
self.remember_fact(f"User's name is {name.title()}")
return assistant_message
agent = AgentWithMemory()
print(agent.chat("Hi! My name is Alex."))
print(agent.chat("I'm learning about AI agents."))
print(agent.chat("What's my name again?"))Try it yourself — PYTHON