Begin a block where all resources allocated during the block will be freed at the end of it. If a resources was created within the memory zone block, accessing it outside the block is invalid. Behaviour of this invalid access is undefined. Memory zones should not be n
(self, mem: Optional[Pool] = None)
| 2099 | |
| 2100 | @contextmanager |
| 2101 | def memory_zone(self, mem: Optional[Pool] = None) -> Iterator[Pool]: |
| 2102 | """Begin a block where all resources allocated during the block will |
| 2103 | be freed at the end of it. If a resources was created within the |
| 2104 | memory zone block, accessing it outside the block is invalid. |
| 2105 | Behaviour of this invalid access is undefined. Memory zones should |
| 2106 | not be nested. |
| 2107 | |
| 2108 | The memory zone is helpful for services that need to process large |
| 2109 | volumes of text with a defined memory budget. |
| 2110 | |
| 2111 | Example |
| 2112 | ------- |
| 2113 | >>> with nlp.memory_zone(): |
| 2114 | ... for doc in nlp.pipe(texts): |
| 2115 | ... process_my_doc(doc) |
| 2116 | >>> # use_doc(doc) <-- Invalid: doc was allocated in the memory zone |
| 2117 | """ |
| 2118 | if mem is None: |
| 2119 | mem = Pool() |
| 2120 | # The ExitStack allows programmatic nested context managers. |
| 2121 | # We don't know how many we need, so it would be awkward to have |
| 2122 | # them as nested blocks. |
| 2123 | with ExitStack() as stack: |
| 2124 | contexts = [stack.enter_context(self.vocab.memory_zone(mem))] |
| 2125 | if hasattr(self.tokenizer, "memory_zone"): |
| 2126 | contexts.append(stack.enter_context(self.tokenizer.memory_zone(mem))) |
| 2127 | for _, pipe in self.pipeline: |
| 2128 | if hasattr(pipe, "memory_zone"): |
| 2129 | contexts.append(stack.enter_context(pipe.memory_zone(mem))) |
| 2130 | yield mem |
| 2131 | |
| 2132 | def to_disk( |
| 2133 | self, path: Union[str, Path], *, exclude: Iterable[str] = SimpleFrozenList() |