MCPcopy
hub / github.com/maziyarpanahi/openmed / analyze_text

Function analyze_text

openmed/__init__.py:150–604  ·  view source on GitHub ↗

Run a token-classification model on ``text`` and format the predictions. Args: text: Clinical or biomedical text to analyse. model_name: Registry key, fully-qualified Hugging Face model id, or local model path. model_id: Alias for ``model_name``. Useful for A

(
    text: str,
    model_name: str = "disease_detection_superclinical",
    *,
    model_id: Optional[str] = None,
    config: Optional[OpenMedConfig] = None,
    loader: Optional[ModelLoader] = None,
    aggregation_strategy: Optional[str] = "simple",
    output_format: str = "dict",
    include_confidence: bool = True,
    confidence_threshold: Optional[float] = 0.0,
    group_entities: bool = False,
    formatter_kwargs: Optional[Dict[str, Any]] = None,
    metadata: Optional[Dict[str, Any]] = None,
    use_fast_tokenizer: bool = True,
    sentence_detection: bool = True,
    sentence_language: str = "en",
    sentence_clean: bool = False,
    sentence_segmenter: Optional[Any] = None,
    cache_results: bool = False,
    max_cache_entries: int = 128,
    **pipeline_kwargs: Any,
)

Source from the content-addressed store, hash-verified

148
149
150def analyze_text(
151 text: str,
152 model_name: str = "disease_detection_superclinical",
153 *,
154 model_id: Optional[str] = None,
155 config: Optional[OpenMedConfig] = None,
156 loader: Optional[ModelLoader] = None,
157 aggregation_strategy: Optional[str] = "simple",
158 output_format: str = "dict",
159 include_confidence: bool = True,
160 confidence_threshold: Optional[float] = 0.0,
161 group_entities: bool = False,
162 formatter_kwargs: Optional[Dict[str, Any]] = None,
163 metadata: Optional[Dict[str, Any]] = None,
164 use_fast_tokenizer: bool = True,
165 sentence_detection: bool = True,
166 sentence_language: str = "en",
167 sentence_clean: bool = False,
168 sentence_segmenter: Optional[Any] = None,
169 cache_results: bool = False,
170 max_cache_entries: int = 128,
171 **pipeline_kwargs: Any,
172) -> Union[AnalyzeResult, str, List[Dict[str, Any]]]:
173 """Run a token-classification model on ``text`` and format the predictions.
174
175 Args:
176 text: Clinical or biomedical text to analyse.
177 model_name: Registry key, fully-qualified Hugging Face model id, or
178 local model path.
179 model_id: Alias for ``model_name``. Useful for APIs and examples that
180 name model identifiers as ``model_id``.
181 config: Optional :class:`~openmed.core.config.OpenMedConfig` instance.
182 loader: Reuse an existing :class:`~openmed.core.models.ModelLoader`.
183 aggregation_strategy: Hugging Face aggregation strategy (``"simple"`` by
184 default). Set to ``None`` to work with raw token outputs.
185 output_format: ``"dict"`` (default), ``"json"``, ``"html"`` or ``"csv"``.
186 include_confidence: Whether to include confidence scores in formatted output.
187 confidence_threshold: Minimum confidence for entities. ``None`` keeps all.
188 group_entities: Merge adjacent entities of the same label in the formatted
189 output.
190 formatter_kwargs: Extra keyword arguments forwarded to
191 :func:`openmed.processing.format_predictions`.
192 metadata: Optional metadata to attach to the result.
193 use_fast_tokenizer: Prefer fast tokenizers when available.
194 sentence_detection: Enable pySBD-powered sentence detection (default: True).
195 sentence_language: Language hint for the sentence detector.
196 sentence_clean: Whether to enable pySBD's cleaning heuristics.
197 sentence_segmenter: Optional preconstructed pySBD segmenter to reuse.
198 cache_results: Whether to cache this result in the in-process LRU cache. Cached results may contain PHI, but are never saved to disk.
199 max_cache_entries: Maximum number of cached results.
200 **pipeline_kwargs: Additional arguments passed to
201 :meth:`openmed.core.models.ModelLoader.create_pipeline`.
202
203 Returns:
204 Analyze result for ``"dict"`` output, otherwise the requested rendered
205 format.
206
207 Example:

Calls 15

validate_inputFunction · 0.85
validate_model_nameFunction · 0.85
make_cache_keyFunction · 0.85
get_result_cacheFunction · 0.85
ModelLoaderClass · 0.85
_normalize_predictionsFunction · 0.85
medical_tokenizeFunction · 0.85
validate_output_formatFunction · 0.85
format_predictionsFunction · 0.85