MCPcopy
hub / github.com/explosion/spaCy / init_config

Function init_config

spacy/cli/init_config.py:204–264  ·  view source on GitHub ↗
(
    *,
    lang: str = InitValues.lang,
    pipeline: List[str] = InitValues.pipeline,
    optimize: str = InitValues.optimize,
    gpu: bool = InitValues.gpu,
    pretraining: bool = InitValues.pretraining,
    silent: bool = True,
)

Source from the content-addressed store, hash-verified

202
203
204def init_config(
205 *,
206 lang: str = InitValues.lang,
207 pipeline: List[str] = InitValues.pipeline,
208 optimize: str = InitValues.optimize,
209 gpu: bool = InitValues.gpu,
210 pretraining: bool = InitValues.pretraining,
211 silent: bool = True,
212) -> Config:
213 msg = Printer(no_print=silent)
214 with TEMPLATE_PATH.open("r") as f:
215 template = Template(f.read())
216 # Filter out duplicates since tok2vec and transformer are added by template
217 pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")]
218 defaults = RECOMMENDATIONS["__default__"]
219 reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).model_dump()
220 variables = {
221 "lang": lang,
222 "components": pipeline,
223 "optimize": optimize,
224 "hardware": "gpu" if gpu else "cpu",
225 "transformer_data": reco["transformer"],
226 "word_vectors": reco["word_vectors"],
227 "has_letters": reco["has_letters"],
228 }
229 if variables["transformer_data"] and not has_spacy_transformers():
230 msg.warn(
231 "To generate a more effective transformer-based config (GPU-only), "
232 "install the spacy-transformers package and re-run this command. "
233 "The config generated now does not use transformers."
234 )
235 variables["transformer_data"] = None
236 base_template = template.render(variables).strip()
237 # Giving up on getting the newlines right in jinja for now
238 base_template = re.sub(r"\n\n\n+", "\n\n", base_template)
239 # Access variables declared in templates
240 template_vars = template.make_module(variables)
241 use_case = {
242 "Language": lang,
243 "Pipeline": ", ".join(pipeline),
244 "Optimize for": optimize,
245 "Hardware": variables["hardware"].upper(),
246 "Transformer": (
247 template_vars.transformer.get("name") # type: ignore[attr-defined]
248 if template_vars.use_transformer # type: ignore[attr-defined]
249 else None
250 ),
251 }
252 msg.info("Generated config template specific for your use case")
253 for label, value in use_case.items():
254 msg.text(f"- {label}: {value}")
255 with show_validation_error(hint_fill=False):
256 config = util.load_config_from_str(base_template)
257 nlp = util.load_model_from_config(config, auto_fill=True)
258 config = nlp.config
259 if pretraining:
260 validate_config_for_pretrain(config, msg)
261 pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH)

Callers 4

test_init_configFunction · 0.90
test_init_labelsFunction · 0.90
init_config_cliFunction · 0.85
debug_diffFunction · 0.85

Calls 7

has_spacy_transformersFunction · 0.85
show_validation_errorFunction · 0.85
mergeMethod · 0.80
getMethod · 0.45
renderMethod · 0.45

Tested by 2

test_init_configFunction · 0.72
test_init_labelsFunction · 0.72

Used in the wild real call sites across dependent graphs

searching dependent graphs…