(
config,
resolved_train_config,
nlp,
pipe,
*,
print_settings: Optional[Dict[str, Any]] = None,
)
| 100 | |
| 101 | |
| 102 | def debug_model( |
| 103 | config, |
| 104 | resolved_train_config, |
| 105 | nlp, |
| 106 | pipe, |
| 107 | *, |
| 108 | print_settings: Optional[Dict[str, Any]] = None, |
| 109 | ): |
| 110 | if not hasattr(pipe, "model"): |
| 111 | msg.fail( |
| 112 | f"The component '{pipe}' does not specify an object that holds a Model.", |
| 113 | exits=1, |
| 114 | ) |
| 115 | model = pipe.model |
| 116 | if not isinstance(model, Model): |
| 117 | msg.fail( |
| 118 | f"Requires a Thinc Model to be analysed, but found {type(model)} instead.", |
| 119 | exits=1, |
| 120 | ) |
| 121 | if print_settings is None: |
| 122 | print_settings = {} |
| 123 | |
| 124 | # STEP 0: Printing before training |
| 125 | msg.info(f"Analysing model with ID {model.id}") |
| 126 | if print_settings.get("print_before_training"): |
| 127 | msg.divider(f"STEP 0 - before training") |
| 128 | _print_model(model, print_settings) |
| 129 | |
| 130 | # STEP 1: Initializing the model and printing again |
| 131 | with data_validation(False): |
| 132 | try: |
| 133 | dot_names = [resolved_train_config["train_corpus"]] |
| 134 | with show_validation_error(): |
| 135 | (train_corpus,) = resolve_dot_names(config, dot_names) |
| 136 | nlp.initialize(lambda: train_corpus(nlp)) |
| 137 | msg.info("Initialized the model with the training corpus.") |
| 138 | examples = list(itertools.islice(train_corpus(nlp), 5)) |
| 139 | except ValueError: |
| 140 | try: |
| 141 | _set_output_dim(nO=7, model=model) |
| 142 | with show_validation_error(): |
| 143 | examples = [Example.from_dict(x, {}) for x in _get_docs()] |
| 144 | nlp.initialize(lambda: examples) |
| 145 | msg.info("Initialized the model with dummy data.") |
| 146 | except Exception: |
| 147 | msg.fail( |
| 148 | "Could not initialize the model: you'll have to provide a valid 'train_corpus' argument in the config file.", |
| 149 | exits=1, |
| 150 | ) |
| 151 | |
| 152 | if print_settings.get("print_after_init"): |
| 153 | msg.divider(f"STEP 1 - after initialization") |
| 154 | _print_model(model, print_settings) |
| 155 | |
| 156 | # STEP 2: Updating the model and printing again |
| 157 | set_dropout_rate(model, 0.2) |
| 158 | # ugly hack to deal with Tok2Vec/Transformer listeners |
| 159 | upstream_component = None |
no test coverage detected
searching dependent graphs…