pgvector support for Python
Supports Django, SQLAlchemy, SQLModel, Psycopg 3, Psycopg 2, asyncpg, and Peewee
Run:
pip install pgvectorAnd follow the instructions for your database library:
Or check out some examples:
- Embeddings with OpenAI
- Sentence embeddings with SentenceTransformers
- Hybrid search with SentenceTransformers (Reciprocal Rank Fusion)
- Hybrid search with SentenceTransformers (cross-encoder)
- Image search with PyTorch
- Implicit feedback recommendations with Implicit
- Explicit feedback recommendations with Surprise
- Recommendations with LightFM
- Horizontal scaling with Citus
Create a migration to enable the extension
frompgvector.djangoimportVectorExtensionclassMigration(migrations.Migration): operations= [ VectorExtension() ]Add a vector field to your model
frompgvector.djangoimportVectorFieldclassItem(models.Model): embedding=VectorField(dimensions=3)Insert a vector
item=Item(embedding=[1, 2, 3]) item.save()Get the nearest neighbors to a vector
frompgvector.djangoimportL2DistanceItem.objects.order_by(L2Distance('embedding', [3, 1, 2]))[:5]Also supports MaxInnerProduct and CosineDistance
Get the distance
Item.objects.annotate(distance=L2Distance('embedding', [3, 1, 2]))Get items within a certain distance
Item.objects.alias(distance=L2Distance('embedding', [3, 1, 2])).filter(distance__lt=5)Average vectors
fromdjango.db.modelsimportAvgItem.objects.aggregate(Avg('embedding'))Also supports Sum
Add an approximate index
frompgvector.djangoimportHnswIndex, IvfflatIndexclassItem(models.Model): classMeta: indexes= [ HnswIndex( name='my_index', fields=['embedding'], m=16, ef_construction=64, opclasses=['vector_l2_ops'] ), # orIvfflatIndex( name='my_index', fields=['embedding'], lists=100, opclasses=['vector_l2_ops'] ) ]Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
Enable the extension
session.execute(text('CREATE EXTENSION IF NOT EXISTS vector'))Add a vector column
frompgvector.sqlalchemyimportVectorclassItem(Base): embedding=mapped_column(Vector(3))Insert a vector
item=Item(embedding=[1, 2, 3]) session.add(item) session.commit()Get the nearest neighbors to a vector
session.scalars(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5))Also supports max_inner_product and cosine_distance
Get the distance
session.scalars(select(Item.embedding.l2_distance([3, 1, 2])))Get items within a certain distance
session.scalars(select(Item).filter(Item.embedding.l2_distance([3, 1, 2]) <5))Average vectors
fromsqlalchemy.sqlimportfuncsession.scalars(select(func.avg(Item.embedding))).first()Also supports sum
Add an approximate index
index=Index( 'my_index', Item.embedding, postgresql_using='hnsw', postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'vector_l2_ops'} ) # orindex=Index( 'my_index', Item.embedding, postgresql_using='ivfflat', postgresql_with={'lists': 100}, postgresql_ops={'embedding': 'vector_l2_ops'} ) index.create(engine)Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
Enable the extension
session.exec(text('CREATE EXTENSION IF NOT EXISTS vector'))Add a vector column
frompgvector.sqlalchemyimportVectorfromsqlalchemyimportColumnclassItem(SQLModel, table=True): embedding: List[float] =Field(sa_column=Column(Vector(3)))Insert a vector
item=Item(embedding=[1, 2, 3]) session.add(item) session.commit()Get the nearest neighbors to a vector
session.exec(select(Item).order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5))Also supports max_inner_product and cosine_distance
Get the distance
session.exec(select(Item.embedding.l2_distance([3, 1, 2])))Get items within a certain distance
session.exec(select(Item).filter(Item.embedding.l2_distance([3, 1, 2]) <5))Average vectors
fromsqlalchemy.sqlimportfuncsession.exec(select(func.avg(Item.embedding))).first()Also supports sum
Add an approximate index
fromsqlalchemyimportIndexindex=Index( 'my_index', Item.embedding, postgresql_using='hnsw', postgresql_with={'m': 16, 'ef_construction': 64}, postgresql_ops={'embedding': 'vector_l2_ops'} ) # orindex=Index( 'my_index', Item.embedding, postgresql_using='ivfflat', postgresql_with={'lists': 100}, postgresql_ops={'embedding': 'vector_l2_ops'} ) index.create(engine)Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
Enable the extension
conn.execute('CREATE EXTENSION IF NOT EXISTS vector')Register the vector type with your connection
frompgvector.psycopgimportregister_vectorregister_vector(conn)For async connections, use
frompgvector.psycopgimportregister_vector_asyncawaitregister_vector_async(conn)Create a table
conn.execute('CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))')Insert a vector
embedding=np.array([1, 2, 3]) conn.execute('INSERT INTO items (embedding) VALUES (%s)', (embedding,))Get the nearest neighbors to a vector
conn.execute('SELECT * FROM items ORDER BY embedding <-> %s LIMIT 5', (embedding,)).fetchall()Add an approximate index
conn.execute('CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)') # orconn.execute('CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)')Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
Enable the extension
cur=conn.cursor() cur.execute('CREATE EXTENSION IF NOT EXISTS vector')Register the vector type with your connection or cursor
frompgvector.psycopg2importregister_vectorregister_vector(conn)Create a table
cur.execute('CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))')Insert a vector
embedding=np.array([1, 2, 3]) cur.execute('INSERT INTO items (embedding) VALUES (%s)', (embedding,))Get the nearest neighbors to a vector
cur.execute('SELECT * FROM items ORDER BY embedding <-> %s LIMIT 5', (embedding,)) cur.fetchall()Add an approximate index
cur.execute('CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)') # orcur.execute('CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)')Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
Enable the extension
awaitconn.execute('CREATE EXTENSION IF NOT EXISTS vector')Register the vector type with your connection
frompgvector.asyncpgimportregister_vectorawaitregister_vector(conn)or your pool
asyncdefinit(conn): awaitregister_vector(conn) pool=awaitasyncpg.create_pool(..., init=init)Create a table
awaitconn.execute('CREATE TABLE items (id bigserial PRIMARY KEY, embedding vector(3))')Insert a vector
embedding=np.array([1, 2, 3]) awaitconn.execute('INSERT INTO items (embedding) VALUES ($1)', embedding)Get the nearest neighbors to a vector
awaitconn.fetch('SELECT * FROM items ORDER BY embedding <-> $1 LIMIT 5', embedding)Add an approximate index
awaitconn.execute('CREATE INDEX ON items USING hnsw (embedding vector_l2_ops)') # orawaitconn.execute('CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 100)')Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
Add a vector column
frompgvector.peeweeimportVectorFieldclassItem(BaseModel): embedding=VectorField(dimensions=3)Insert a vector
item=Item.create(embedding=[1, 2, 3])Get the nearest neighbors to a vector
Item.select().order_by(Item.embedding.l2_distance([3, 1, 2])).limit(5)Also supports max_inner_product and cosine_distance
Get the distance
Item.select(Item.embedding.l2_distance([3, 1, 2]).alias('distance'))Get items within a certain distance
Item.select().where(Item.embedding.l2_distance([3, 1, 2]) <5)Average vectors
frompeeweeimportfnItem.select(fn.avg(Item.embedding)).scalar()Also supports sum
Add an approximate index
Item.add_index('embedding vector_l2_ops', using='hnsw')Use vector_ip_ops for inner product and vector_cosine_ops for cosine distance
View the changelog
Everyone is encouraged to help improve this project. Here are a few ways you can help:
- Report bugs
- Fix bugs and submit pull requests
- Write, clarify, or fix documentation
- Suggest or add new features
To get started with development:
git clone https://github.com/pgvector/pgvector-python.git cd pgvector-python pip install -r requirements.txt createdb pgvector_python_test pytest