Building Agents

Agent Memory

Give your agents persistent memory using short-term context and long-term vector storage.

Types of Agent Memory

Memory TypeDescriptionStorage
Short-termCurrent conversation contextLLM context window
Long-termPersistent facts and experiencesVector DB / SQL
EpisodicPast conversation summariesVector DB
SemanticKnowledge and factsVector DB
WorkingIntermediate reasoning stepsIn-context

Context Window Limitations

LLMs have limited context windows. For long conversations:

  • Summarize old messages and compress them
  • Store important information in a vector DB
  • Retrieve relevant memories at query time

Memory Design Patterns

  • Sliding window: Keep last N messages
  • Summarization: Compress old messages into a summary
  • RAG memory: Store facts in vector DB, retrieve relevant ones
  • Hierarchical: Short-term → compress → long-term

Example

python
from anthropic import Anthropic
from collections import deque
import json

client = Anthropic()

class AgentWithMemory:
    def __init__(self, max_short_term: int = 10):
        self.short_term = deque(maxlen=max_short_term * 2)  # pairs of user/assistant
        self.long_term_facts = []  # simple list; use vector DB in production
        self.conversation_summary = ""

    def remember_fact(self, fact: str):
        """Store an important fact in long-term memory"""
        self.long_term_facts.append(fact)

    def build_system_prompt(self) -> str:
        parts = ["You are a helpful assistant with memory capabilities."]

        if self.conversation_summary:
            parts.append(f"
Conversation summary so far:
{self.conversation_summary}")

        if self.long_term_facts:
            facts = "
".join(f"- {f}" for f in self.long_term_facts[-10:])
            parts.append(f"
Important facts I remember:
{facts}")

        return "
".join(parts)

    def should_summarize(self) -> bool:
        return len(self.short_term) >= 18  # near the deque limit

    def summarize_conversation(self):
        """Compress old messages into a summary"""
        old_messages = list(self.short_term)[:10]

        summary_response = client.messages.create(
            model="claude-3-5-haiku-20241022",
            max_tokens=300,
            messages=[
                {"role": "user", "content": f"Summarize this conversation in 2-3 sentences:
{json.dumps(old_messages)}"}
            ]
        )

        new_summary = summary_response.content[0].text
        if self.conversation_summary:
            self.conversation_summary += " " + new_summary
        else:
            self.conversation_summary = new_summary

        # Remove old messages from short-term memory
        for _ in range(10):
            if self.short_term:
                self.short_term.popleft()

    def chat(self, user_message: str) -> str:
        if self.should_summarize():
            self.summarize_conversation()

        self.short_term.append({"role": "user", "content": user_message})

        response = client.messages.create(
            model="claude-3-5-haiku-20241022",
            max_tokens=1024,
            system=self.build_system_prompt(),
            messages=list(self.short_term)
        )

        assistant_message = response.content[0].text
        self.short_term.append({"role": "assistant", "content": assistant_message})

        # Auto-extract facts (in production, use LLM to detect important facts)
        if "my name is" in user_message.lower():
            name = user_message.lower().split("my name is")[1].strip().split()[0]
            self.remember_fact(f"User's name is {name.title()}")

        return assistant_message

agent = AgentWithMemory()
print(agent.chat("Hi! My name is Alex."))
print(agent.chat("I'm learning about AI agents."))
print(agent.chat("What's my name again?"))
Try it yourself — PYTHON