Create and index a SembleIndex from a directory. :param path: Root directory to index. :param content: Content types to index, e.g. ContentType.CODE or [ContentType.CODE, ContentType.DOCS]. :param include_text_files: Deprecated. Pass a content sequence directly instead.
(
cls,
path: str | Path,
content: ContentType | Sequence[ContentType] = _DEFAULT_CONTENT,
include_text_files: bool | None = None,
model_path: str | None = None,
)
| 123 | |
| 124 | @classmethod |
| 125 | def from_path( |
| 126 | cls, |
| 127 | path: str | Path, |
| 128 | content: ContentType | Sequence[ContentType] = _DEFAULT_CONTENT, |
| 129 | include_text_files: bool | None = None, |
| 130 | model_path: str | None = None, |
| 131 | ) -> SembleIndex: |
| 132 | """Create and index a SembleIndex from a directory. |
| 133 | |
| 134 | :param path: Root directory to index. |
| 135 | :param content: Content types to index, e.g. ContentType.CODE or [ContentType.CODE, ContentType.DOCS]. |
| 136 | :param include_text_files: Deprecated. Pass a content sequence directly instead. |
| 137 | :param model_path: Path to the model to use. If None, the default model will be used. |
| 138 | :return: An indexed SembleIndex. Chunk file paths are relative to ``path``. |
| 139 | :raises FileNotFoundError: If `path` does not exist. |
| 140 | :raises NotADirectoryError: If `path` exists but is not a directory. |
| 141 | """ |
| 142 | path = Path(path) |
| 143 | if not path.exists(): |
| 144 | raise FileNotFoundError(f"Path does not exist: {path}") |
| 145 | if not path.is_dir(): |
| 146 | raise NotADirectoryError(f"Path is not a directory: {path}") |
| 147 | |
| 148 | normalized = _apply_include_text_files(content, include_text_files) |
| 149 | cache_path = get_validated_cache(str(path), model_path, normalized) |
| 150 | if cache_path: |
| 151 | return cls.load_from_disk(cache_path) |
| 152 | model, model_path = load_model(model_path) |
| 153 | |
| 154 | path = path.resolve() |
| 155 | bm25, vicinity, chunks = create_index_from_path( |
| 156 | path, |
| 157 | model=model, |
| 158 | content=normalized, |
| 159 | display_root=path, |
| 160 | ) |
| 161 | |
| 162 | return SembleIndex(model, bm25, vicinity, chunks, model_path, root=path, content=normalized) |
| 163 | |
| 164 | @classmethod |
| 165 | def from_git( |