MCPcopy
hub / github.com/explosion/spaCy / initialize

Method initialize

spacy/language.py:1293–1367  ·  view source on GitHub ↗

Initialize the pipe for training, using data examples if available. get_examples (Callable[[], Iterable[Example]]): Optional function that returns gold-standard Example objects. sgd (Optional[Optimizer]): An optimizer to use for updates. If not provided, will

(
        self,
        get_examples: Optional[Callable[[], Iterable[Example]]] = None,
        *,
        sgd: Optional[Optimizer] = None,
    )

Source from the content-addressed store, hash-verified

1291 return self.initialize(get_examples, sgd=sgd)
1292
1293 def initialize(
1294 self,
1295 get_examples: Optional[Callable[[], Iterable[Example]]] = None,
1296 *,
1297 sgd: Optional[Optimizer] = None,
1298 ) -> Optimizer:
1299 """Initialize the pipe for training, using data examples if available.
1300
1301 get_examples (Callable[[], Iterable[Example]]): Optional function that
1302 returns gold-standard Example objects.
1303 sgd (Optional[Optimizer]): An optimizer to use for updates. If not
1304 provided, will be created using the .create_optimizer() method.
1305 RETURNS (thinc.api.Optimizer): The optimizer.
1306
1307 DOCS: https://spacy.io/api/language#initialize
1308 """
1309 if get_examples is None:
1310 util.logger.debug(
1311 "No 'get_examples' callback provided to 'Language.initialize', creating dummy examples"
1312 )
1313 doc = Doc(self.vocab, words=["x", "y", "z"])
1314
1315 def get_examples():
1316 return [Example.from_dict(doc, {})]
1317
1318 if not hasattr(get_examples, "__call__"):
1319 err = Errors.E930.format(
1320 method="Language.initialize", obj=type(get_examples)
1321 )
1322 raise TypeError(err)
1323 # Make sure the config is interpolated so we can resolve subsections
1324 config = self.config.interpolate()
1325 # These are the settings provided in the [initialize] block in the config
1326 I = registry.resolve(config["initialize"], schema=ConfigSchemaInit) # type: ignore[arg-type]
1327 before_init = I["before_init"]
1328 if before_init is not None:
1329 before_init(self)
1330 try:
1331 init_vocab(
1332 self, data=I["vocab_data"], lookups=I["lookups"], vectors=I["vectors"]
1333 )
1334 except IOError:
1335 raise IOError(Errors.E884.format(vectors=I["vectors"]))
1336 if self.vocab.vectors.shape[1] >= 1:
1337 ops = get_current_ops()
1338 self.vocab.vectors.to_ops(ops)
1339 if hasattr(self.tokenizer, "initialize"):
1340 tok_settings = validate_init_settings(
1341 self.tokenizer.initialize, # type: ignore[union-attr]
1342 I["tokenizer"],
1343 section="tokenizer",
1344 name="tokenizer",
1345 )
1346 self.tokenizer.initialize(get_examples, nlp=self, **tok_settings) # type: ignore[union-attr]
1347 for name, proc in self.pipeline:
1348 if isinstance(proc, ty.InitializableComponent):
1349 p_settings = I["components"].get(name, {})
1350 p_settings = validate_init_settings(

Calls 8

_link_componentsMethod · 0.95
create_optimizerMethod · 0.95
before_initFunction · 0.85
init_vocabFunction · 0.85
validate_init_settingsFunction · 0.85
init_tok2vecFunction · 0.85
after_initFunction · 0.85
getMethod · 0.45