(model: str, inputs: Optional[Path] = None, n_texts: int = 10000)
| 50 | |
| 51 | |
| 52 | def profile(model: str, inputs: Optional[Path] = None, n_texts: int = 10000) -> None: |
| 53 | if inputs is not None: |
| 54 | texts = _read_inputs(inputs, msg) |
| 55 | texts = list(itertools.islice(texts, n_texts)) |
| 56 | if inputs is None: |
| 57 | try: |
| 58 | import ml_datasets |
| 59 | except ImportError: |
| 60 | msg.fail( |
| 61 | "This command, when run without an input file, " |
| 62 | "requires the ml_datasets library to be installed: " |
| 63 | "pip install ml_datasets", |
| 64 | exits=1, |
| 65 | ) |
| 66 | |
| 67 | with msg.loading("Loading IMDB dataset via ml_datasets..."): |
| 68 | imdb_train, _ = ml_datasets.imdb(train_limit=n_texts, dev_limit=0) |
| 69 | texts, _ = zip(*imdb_train) |
| 70 | msg.info(f"Loaded IMDB dataset and using {n_texts} examples") |
| 71 | with msg.loading(f"Loading pipeline '{model}'..."): |
| 72 | nlp = load_model(model) |
| 73 | msg.good(f"Loaded pipeline '{model}'") |
| 74 | cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof") |
| 75 | s = pstats.Stats("Profile.prof") |
| 76 | msg.divider("Profile stats") |
| 77 | s.strip_dirs().sort_stats("time").print_stats() |
| 78 | |
| 79 | |
| 80 | def parse_texts(nlp: Language, texts: Sequence[str]) -> None: |
no test coverage detected
searching dependent graphs…