(self)
| 122 | ) |
| 123 | |
| 124 | def _setup_table(self) -> None: |
| 125 | try: |
| 126 | from pgvector.sqlalchemy import Vector |
| 127 | except ImportError as e: |
| 128 | raise LangroidImportError(extra="postgres", error=str(e)) |
| 129 | |
| 130 | if self.config.replace_collection: |
| 131 | self.delete_collection(self.config.collection_name) |
| 132 | |
| 133 | self.embeddings_table = Table( |
| 134 | self.config.collection_name, |
| 135 | self.metadata, |
| 136 | Column("id", String, primary_key=True, nullable=False, unique=True), |
| 137 | Column("embedding", Vector(self.embedding_dim)), |
| 138 | Column("document", String), |
| 139 | Column("cmetadata", JSONB), |
| 140 | extend_existing=True, |
| 141 | ) |
| 142 | |
| 143 | self.metadata.create_all(self.engine) |
| 144 | self.metadata.reflect(bind=self.engine, only=[self.config.collection_name]) |
| 145 | |
| 146 | # Create HNSW index for embeddings column if it doesn't exist. |
| 147 | # This index enables efficient nearest-neighbor search using cosine similarity. |
| 148 | # PostgreSQL automatically builds the index after creation; |
| 149 | # no manual step required. |
| 150 | # Read more about pgvector hnsw index here: |
| 151 | # https://github.com/pgvector/pgvector?tab=readme-ov-file#hnsw |
| 152 | |
| 153 | index_name = f"hnsw_index_{self.config.collection_name}_embedding" |
| 154 | with self.engine.connect() as connection: |
| 155 | if not self.index_exists(connection, index_name): |
| 156 | connection.execute(text("COMMIT")) |
| 157 | create_index_query = text( |
| 158 | f""" |
| 159 | CREATE INDEX CONCURRENTLY IF NOT EXISTS {_quote_ident(index_name)} |
| 160 | ON {_quote_ident(self.config.collection_name)} |
| 161 | USING hnsw (embedding vector_cosine_ops) |
| 162 | WITH ( |
| 163 | m = {self.config.hnsw_m}, |
| 164 | ef_construction = {self.config.hnsw_ef_construction} |
| 165 | ); |
| 166 | """ |
| 167 | ) |
| 168 | connection.execute(create_index_query) |
| 169 | |
| 170 | def index_exists(self, connection: Connection, index_name: str) -> bool: |
| 171 | """Check if an index exists.""" |
no test coverage detected