Add patterns to the span ruler. A pattern can either be a token pattern (list of dicts) or a phrase pattern (string). For example: {'label': 'ORG', 'pattern': 'Apple'} {'label': 'ORG', 'pattern': 'Apple', 'id': 'apple'} {'label': 'GPE', 'pattern': [{'lower': 'san'}, {
(self, patterns: List[PatternType])
| 320 | return self._patterns |
| 321 | |
| 322 | def add_patterns(self, patterns: List[PatternType]) -> None: |
| 323 | """Add patterns to the span ruler. A pattern can either be a token |
| 324 | pattern (list of dicts) or a phrase pattern (string). For example: |
| 325 | {'label': 'ORG', 'pattern': 'Apple'} |
| 326 | {'label': 'ORG', 'pattern': 'Apple', 'id': 'apple'} |
| 327 | {'label': 'GPE', 'pattern': [{'lower': 'san'}, {'lower': 'francisco'}]} |
| 328 | |
| 329 | patterns (list): The patterns to add. |
| 330 | |
| 331 | DOCS: https://spacy.io/api/spanruler#add_patterns |
| 332 | """ |
| 333 | |
| 334 | # disable the nlp components after this one in case they haven't been |
| 335 | # initialized / deserialized yet |
| 336 | try: |
| 337 | current_index = -1 |
| 338 | for i, (name, pipe) in enumerate(self.nlp.pipeline): |
| 339 | if self == pipe: |
| 340 | current_index = i |
| 341 | break |
| 342 | subsequent_pipes = [pipe for pipe in self.nlp.pipe_names[current_index:]] |
| 343 | except ValueError: |
| 344 | subsequent_pipes = [] |
| 345 | with self.nlp.select_pipes(disable=subsequent_pipes): |
| 346 | phrase_pattern_labels = [] |
| 347 | phrase_pattern_texts = [] |
| 348 | for entry in patterns: |
| 349 | p_label = cast(str, entry["label"]) |
| 350 | p_id = cast(str, entry.get("id", "")) |
| 351 | label = repr((p_label, p_id)) |
| 352 | self._match_label_id_map[self.nlp.vocab.strings.as_int(label)] = { |
| 353 | "label": p_label, |
| 354 | "id": p_id, |
| 355 | } |
| 356 | if isinstance(entry["pattern"], str): |
| 357 | phrase_pattern_labels.append(label) |
| 358 | phrase_pattern_texts.append(entry["pattern"]) |
| 359 | elif isinstance(entry["pattern"], list): |
| 360 | self.matcher.add(label, [entry["pattern"]]) |
| 361 | else: |
| 362 | raise ValueError(Errors.E097.format(pattern=entry["pattern"])) |
| 363 | self._patterns.append(entry) |
| 364 | for label, pattern in zip( |
| 365 | phrase_pattern_labels, |
| 366 | self.nlp.pipe(phrase_pattern_texts), |
| 367 | ): |
| 368 | self.phrase_matcher.add(label, [pattern]) |
| 369 | |
| 370 | def clear(self) -> None: |
| 371 | """Reset all patterns. |