chunking
Splitting documents into smaller, retrievable segments. Chunk size (typically 256–1024 tokens) affects retrieval precision and context quality.
Syntax
rag
chunks = splitter.split(document, chunk_size=512, overlap=50)Example
rag
# Fixed-size chunking:
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
chunk_size=512,
chunk_overlap=64,
separators=["\n\n", "\n", ".", " "]
)
chunks = splitter.split_text(long_document)
print(f"Created {len(chunks)} chunks")
print(f"First chunk: {chunks[0][:100]}...")