Agents

Memory and Chat History

Add persistent memory to your LangChain applications for multi-turn conversations.

Memory in LangChain

LangChain provides several approaches for conversation memory:

Message History (Recommended)

Store messages and inject them into the prompt.

Memory Types (Legacy)

ConversationBufferMemory: Store all messages
ConversationSummaryMemory: Summarize old messages
ConversationBufferWindowMemory: Keep last N messages

RunnableWithMessageHistory

The modern approach wraps any chain with message history management. You provide:

get_session_history: Function that returns a message store for a given session ID
input_messages_key: Which input key contains the new message
history_messages_key: Where to inject history in the prompt

Example

python

from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.chat_history import BaseChatMessageHistory, InMemoryChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

llm = ChatAnthropic(model="claude-3-5-haiku-20241022")

# In-memory store (use Redis/DB in production)
store: dict[str, InMemoryChatMessageHistory] = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]

# Create a chain with memory
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Remember details from the conversation."),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{input}"),
])

chain = prompt | llm | StrOutputParser()

# Wrap with message history
chain_with_memory = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

# Use with session IDs for multi-user support
def chat(session_id: str, message: str) -> str:
    return chain_with_memory.invoke(
        {"input": message},
        config={"configurable": {"session_id": session_id}}
    )

# Session 1: Alice
print(chat("alice", "Hi! My name is Alice and I love Python."))
print(chat("alice", "What programming language did I just mention?"))

# Session 2: Bob (separate memory)
print(chat("bob", "Hi! I'm Bob and I prefer JavaScript."))
print(chat("bob", "What's my name and preferred language?"))

# Summary memory pattern
from langchain_core.messages import trim_messages

# Trim messages to avoid context overflow
trimmer = trim_messages(
    max_tokens=2000,
    strategy="last",
    token_counter=llm,
    include_system=True,
)

trimmed_chain = (
    {"input": lambda x: x["input"], "history": lambda x: trimmer.invoke(x["history"])}
    | prompt
    | llm
    | StrOutputParser()
)

Try it yourself — PYTHON

from langchain_anthropic import ChatAnthropic
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.output_parsers import StrOutputParser
from langchain_core.chat_history import BaseChatMessageHistory, InMemoryChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

llm = ChatAnthropic(model="claude-3-5-haiku-20241022")

# In-memory store (use Redis/DB in production)
store: dict[str, InMemoryChatMessageHistory] = {}

def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]

# Create a chain with memory
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant. Remember details from the conversation."),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{input}"),
])

chain = prompt | llm | StrOutputParser()

# Wrap with message history
chain_with_memory = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="history",
)

# Use with session IDs for multi-user support
def chat(session_id: str, message: str) -> str:
    return chain_with_memory.invoke(
        {"input": message},
        config={"configurable": {"session_id": session_id}}
    )

# Session 1: Alice
print(chat("alice", "Hi! My name is Alice and I love Python."))
print(chat("alice", "What programming language did I just mention?"))

# Session 2: Bob (separate memory)
print(chat("bob", "Hi! I'm Bob and I prefer JavaScript."))
print(chat("bob", "What's my name and preferred language?"))

# Summary memory pattern
from langchain_core.messages import trim_messages

# Trim messages to avoid context overflow
trimmer = trim_messages(
    max_tokens=2000,
    strategy="last",
    token_counter=llm,
    include_system=True,
)

trimmed_chain = (
    {"input": lambda x: x["input"], "history": lambda x: trimmer.invoke(x["history"])}
    | prompt
    | llm
    | StrOutputParser()
)