Tools

pgvector with PostgreSQL

Add vector search capabilities to PostgreSQL using the pgvector extension.

pgvector

pgvector is a PostgreSQL extension that adds vector similarity search to your existing Postgres database. If you're already using PostgreSQL, this is often the simplest path to adding vector search.

Key Features

Store vectors as a native PostgreSQL type
Index vectors with HNSW or IVFFlat for fast search
Three similarity operators: L2 distance (<->), cosine distance (<=>), inner product (<#>)
Works with all PostgreSQL features: transactions, joins, filters

When to Use pgvector

You already use PostgreSQL
Your vector data is alongside relational data
You need transactional consistency
Your scale is millions (not billions) of vectors

Setup

sql

CREATE EXTENSION IF NOT EXISTS vector;

Example

sql

-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;

-- Create a table with a vector column
CREATE TABLE documents (
    id          BIGSERIAL PRIMARY KEY,
    content     TEXT NOT NULL,
    embedding   VECTOR(1536),  -- OpenAI text-embedding-3-small dimension
    source      TEXT,
    created_at  TIMESTAMPTZ DEFAULT NOW()
);

-- Create HNSW index for fast similarity search
-- (better query performance, slower indexing)
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);

-- Or IVFFlat index (faster to build, slightly slower queries)
-- CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops)
-- WITH (lists = 100);

-- Insert a document with embedding (pass as array)
INSERT INTO documents (content, embedding, source)
VALUES (
    'Vector databases are optimized for similarity search',
    '[0.1, 0.2, 0.3, ...]'::vector,  -- actual 1536-dim vector
    'documentation'
);

-- Semantic search: find most similar documents
-- <-> = L2 distance, <=> = cosine distance, <#> = negative inner product
SELECT
    id,
    content,
    source,
    1 - (embedding <=> '[0.1, 0.2, ...]'::vector) AS similarity
FROM documents
ORDER BY embedding <=> '[0.1, 0.2, ...]'::vector
LIMIT 10;

-- Filter + semantic search (hybrid)
SELECT
    id,
    content,
    1 - (embedding <=> '[0.1, 0.2, ...]'::vector) AS similarity
FROM documents
WHERE source = 'documentation'   -- metadata filter
  AND created_at > '2024-01-01'
ORDER BY embedding <=> '[0.1, 0.2, ...]'::vector
LIMIT 5;

Try it yourself — SQL

-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;

-- Create a table with a vector column
CREATE TABLE documents (
    id          BIGSERIAL PRIMARY KEY,
    content     TEXT NOT NULL,
    embedding   VECTOR(1536),  -- OpenAI text-embedding-3-small dimension
    source      TEXT,
    created_at  TIMESTAMPTZ DEFAULT NOW()
);

-- Create HNSW index for fast similarity search
-- (better query performance, slower indexing)
CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);

-- Or IVFFlat index (faster to build, slightly slower queries)
-- CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops)
-- WITH (lists = 100);

-- Insert a document with embedding (pass as array)
INSERT INTO documents (content, embedding, source)
VALUES (
    'Vector databases are optimized for similarity search',
    '[0.1, 0.2, 0.3, ...]'::vector,  -- actual 1536-dim vector
    'documentation'
);

-- Semantic search: find most similar documents
-- <-> = L2 distance, <=> = cosine distance, <#> = negative inner product
SELECT
    id,
    content,
    source,
    1 - (embedding <=> '[0.1, 0.2, ...]'::vector) AS similarity
FROM documents
ORDER BY embedding <=> '[0.1, 0.2, ...]'::vector
LIMIT 10;

-- Filter + semantic search (hybrid)
SELECT
    id,
    content,
    1 - (embedding <=> '[0.1, 0.2, ...]'::vector) AS similarity
FROM documents
WHERE source = 'documentation'   -- metadata filter
  AND created_at > '2024-01-01'
ORDER BY embedding <=> '[0.1, 0.2, ...]'::vector
LIMIT 5;