MCPcopy
hub / github.com/MinishLab/semble / from_path

Method from_path

src/semble/index/index.py:125–162  ·  view source on GitHub ↗

Create and index a SembleIndex from a directory. :param path: Root directory to index. :param content: Content types to index, e.g. ContentType.CODE or [ContentType.CODE, ContentType.DOCS]. :param include_text_files: Deprecated. Pass a content sequence directly instead.

(
        cls,
        path: str | Path,
        content: ContentType | Sequence[ContentType] = _DEFAULT_CONTENT,
        include_text_files: bool | None = None,
        model_path: str | None = None,
    )

Source from the content-addressed store, hash-verified

123
124 @classmethod
125 def from_path(
126 cls,
127 path: str | Path,
128 content: ContentType | Sequence[ContentType] = _DEFAULT_CONTENT,
129 include_text_files: bool | None = None,
130 model_path: str | None = None,
131 ) -> SembleIndex:
132 """Create and index a SembleIndex from a directory.
133
134 :param path: Root directory to index.
135 :param content: Content types to index, e.g. ContentType.CODE or [ContentType.CODE, ContentType.DOCS].
136 :param include_text_files: Deprecated. Pass a content sequence directly instead.
137 :param model_path: Path to the model to use. If None, the default model will be used.
138 :return: An indexed SembleIndex. Chunk file paths are relative to ``path``.
139 :raises FileNotFoundError: If `path` does not exist.
140 :raises NotADirectoryError: If `path` exists but is not a directory.
141 """
142 path = Path(path)
143 if not path.exists():
144 raise FileNotFoundError(f"Path does not exist: {path}")
145 if not path.is_dir():
146 raise NotADirectoryError(f"Path is not a directory: {path}")
147
148 normalized = _apply_include_text_files(content, include_text_files)
149 cache_path = get_validated_cache(str(path), model_path, normalized)
150 if cache_path:
151 return cls.load_from_disk(cache_path)
152 model, model_path = load_model(model_path)
153
154 path = path.resolve()
155 bm25, vicinity, chunks = create_index_from_path(
156 path,
157 model=model,
158 content=normalized,
159 display_root=path,
160 )
161
162 return SembleIndex(model, bm25, vicinity, chunks, model_path, root=path, content=normalized)
163
164 @classmethod
165 def from_git(

Callers 15

_build_indexFunction · 0.45
_run_clearFunction · 0.45
get_validated_cacheFunction · 0.45
load_from_diskMethod · 0.45
saveMethod · 0.45
indexed_indexFunction · 0.45
_bench_sembleFunction · 0.45
_bench_coderankembedFunction · 0.45

Calls 6

get_validated_cacheFunction · 0.90
load_modelFunction · 0.90
create_index_from_pathFunction · 0.90
SembleIndexClass · 0.85
load_from_diskMethod · 0.80