(
*,
lang: str = InitValues.lang,
pipeline: List[str] = InitValues.pipeline,
optimize: str = InitValues.optimize,
gpu: bool = InitValues.gpu,
pretraining: bool = InitValues.pretraining,
silent: bool = True,
)
| 202 | |
| 203 | |
| 204 | def init_config( |
| 205 | *, |
| 206 | lang: str = InitValues.lang, |
| 207 | pipeline: List[str] = InitValues.pipeline, |
| 208 | optimize: str = InitValues.optimize, |
| 209 | gpu: bool = InitValues.gpu, |
| 210 | pretraining: bool = InitValues.pretraining, |
| 211 | silent: bool = True, |
| 212 | ) -> Config: |
| 213 | msg = Printer(no_print=silent) |
| 214 | with TEMPLATE_PATH.open("r") as f: |
| 215 | template = Template(f.read()) |
| 216 | # Filter out duplicates since tok2vec and transformer are added by template |
| 217 | pipeline = [pipe for pipe in pipeline if pipe not in ("tok2vec", "transformer")] |
| 218 | defaults = RECOMMENDATIONS["__default__"] |
| 219 | reco = RecommendationSchema(**RECOMMENDATIONS.get(lang, defaults)).model_dump() |
| 220 | variables = { |
| 221 | "lang": lang, |
| 222 | "components": pipeline, |
| 223 | "optimize": optimize, |
| 224 | "hardware": "gpu" if gpu else "cpu", |
| 225 | "transformer_data": reco["transformer"], |
| 226 | "word_vectors": reco["word_vectors"], |
| 227 | "has_letters": reco["has_letters"], |
| 228 | } |
| 229 | if variables["transformer_data"] and not has_spacy_transformers(): |
| 230 | msg.warn( |
| 231 | "To generate a more effective transformer-based config (GPU-only), " |
| 232 | "install the spacy-transformers package and re-run this command. " |
| 233 | "The config generated now does not use transformers." |
| 234 | ) |
| 235 | variables["transformer_data"] = None |
| 236 | base_template = template.render(variables).strip() |
| 237 | # Giving up on getting the newlines right in jinja for now |
| 238 | base_template = re.sub(r"\n\n\n+", "\n\n", base_template) |
| 239 | # Access variables declared in templates |
| 240 | template_vars = template.make_module(variables) |
| 241 | use_case = { |
| 242 | "Language": lang, |
| 243 | "Pipeline": ", ".join(pipeline), |
| 244 | "Optimize for": optimize, |
| 245 | "Hardware": variables["hardware"].upper(), |
| 246 | "Transformer": ( |
| 247 | template_vars.transformer.get("name") # type: ignore[attr-defined] |
| 248 | if template_vars.use_transformer # type: ignore[attr-defined] |
| 249 | else None |
| 250 | ), |
| 251 | } |
| 252 | msg.info("Generated config template specific for your use case") |
| 253 | for label, value in use_case.items(): |
| 254 | msg.text(f"- {label}: {value}") |
| 255 | with show_validation_error(hint_fill=False): |
| 256 | config = util.load_config_from_str(base_template) |
| 257 | nlp = util.load_model_from_config(config, auto_fill=True) |
| 258 | config = nlp.config |
| 259 | if pretraining: |
| 260 | validate_config_for_pretrain(config, msg) |
| 261 | pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH) |
searching dependent graphs…