Register a new pipeline component. Can be used for stateless function components that don't require a separate factory. Can be used as a decorator on a function or classmethod, or called as a function with the factory provided as the func keyword argument. To create a compone
(
cls,
name: str,
*,
assigns: Iterable[str] = SimpleFrozenList(),
requires: Iterable[str] = SimpleFrozenList(),
retokenizes: bool = False,
func: Optional[PipeCallable] = None,
)
| 554 | |
| 555 | @classmethod |
| 556 | def component( |
| 557 | cls, |
| 558 | name: str, |
| 559 | *, |
| 560 | assigns: Iterable[str] = SimpleFrozenList(), |
| 561 | requires: Iterable[str] = SimpleFrozenList(), |
| 562 | retokenizes: bool = False, |
| 563 | func: Optional[PipeCallable] = None, |
| 564 | ) -> Callable[..., Any]: |
| 565 | """Register a new pipeline component. Can be used for stateless function |
| 566 | components that don't require a separate factory. Can be used as a |
| 567 | decorator on a function or classmethod, or called as a function with the |
| 568 | factory provided as the func keyword argument. To create a component and |
| 569 | add it to the pipeline, you can use nlp.add_pipe(name). |
| 570 | |
| 571 | name (str): The name of the component factory. |
| 572 | assigns (Iterable[str]): Doc/Token attributes assigned by this component, |
| 573 | e.g. "token.ent_id". Used for pipeline analysis. |
| 574 | requires (Iterable[str]): Doc/Token attributes required by this component, |
| 575 | e.g. "token.ent_id". Used for pipeline analysis. |
| 576 | retokenizes (bool): Whether the component changes the tokenization. |
| 577 | Used for pipeline analysis. |
| 578 | func (Optional[Callable[[Doc], Doc]): Factory function if not used as a decorator. |
| 579 | |
| 580 | DOCS: https://spacy.io/api/language#component |
| 581 | """ |
| 582 | if name is not None: |
| 583 | if not isinstance(name, str): |
| 584 | raise ValueError(Errors.E963.format(decorator="component")) |
| 585 | if "." in name: |
| 586 | raise ValueError(Errors.E853.format(name=name)) |
| 587 | component_name = name if name is not None else util.get_object_name(func) |
| 588 | |
| 589 | def add_component(component_func: PipeCallable) -> Callable: |
| 590 | if isinstance(func, type): # function is a class |
| 591 | raise ValueError(Errors.E965.format(name=component_name)) |
| 592 | |
| 593 | def factory_func(nlp, name: str) -> PipeCallable: |
| 594 | return component_func |
| 595 | |
| 596 | internal_name = cls.get_factory_name(name) |
| 597 | if internal_name in registry.factories: |
| 598 | # We only check for the internal name here – it's okay if it's a |
| 599 | # subclass and the base class has a factory of the same name. We |
| 600 | # also only raise if the function is different to prevent raising |
| 601 | # if module is reloaded. It's hacky, but we need to check the |
| 602 | # existing functure for a closure and whether that's identical |
| 603 | # to the component function (because factory_func created above |
| 604 | # will always be different, even for the same function) |
| 605 | existing_func = registry.factories.get(internal_name) |
| 606 | closure = existing_func.__closure__ |
| 607 | wrapped = [c.cell_contents for c in closure][0] if closure else None |
| 608 | if util.is_same_func(wrapped, component_func): |
| 609 | factory_func = existing_func # noqa: F811 |
| 610 | |
| 611 | cls.factory( |
| 612 | component_name, |
| 613 | assigns=assigns, |