Apply a trained pipeline to documents to get predictions. Expects a loadable spaCy pipeline and path to the data, which can be a directory or a file. The data files can be provided in multiple formats: 1. .spacy files 2. .jsonl files with a specified "field" to read
(
# fmt: off
model: str = Arg(..., help="Model name or path"),
data_path: Path = Arg(..., help=path_help, exists=True),
output_file: Path = Arg(..., help=out_help, dir_okay=False),
code_path: Optional[Path] = Opt(None, "--code", "-c", help=code_help),
text_key: str = Opt(
"text", "--text-key", "-tk", help="Key containing text string for JSONL"
),
force_overwrite: bool = Opt(
False, "--force", "-F", help="Force overwriting the output file"
),
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU."),
batch_size: int = Opt(1, "--batch-size", "-b", help="Batch size."),
n_process: int = Opt(1, "--n-process", "-n", help="number of processors to use."),
)
| 67 | |
| 68 | @app.command("apply") |
| 69 | def apply_cli( |
| 70 | # fmt: off |
| 71 | model: str = Arg(..., help="Model name or path"), |
| 72 | data_path: Path = Arg(..., help=path_help, exists=True), |
| 73 | output_file: Path = Arg(..., help=out_help, dir_okay=False), |
| 74 | code_path: Optional[Path] = Opt(None, "--code", "-c", help=code_help), |
| 75 | text_key: str = Opt( |
| 76 | "text", "--text-key", "-tk", help="Key containing text string for JSONL" |
| 77 | ), |
| 78 | force_overwrite: bool = Opt( |
| 79 | False, "--force", "-F", help="Force overwriting the output file" |
| 80 | ), |
| 81 | use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU."), |
| 82 | batch_size: int = Opt(1, "--batch-size", "-b", help="Batch size."), |
| 83 | n_process: int = Opt(1, "--n-process", "-n", help="number of processors to use."), |
| 84 | ): |
| 85 | """ |
| 86 | Apply a trained pipeline to documents to get predictions. |
| 87 | Expects a loadable spaCy pipeline and path to the data, which |
| 88 | can be a directory or a file. |
| 89 | The data files can be provided in multiple formats: |
| 90 | 1. .spacy files |
| 91 | 2. .jsonl files with a specified "field" to read the text from. |
| 92 | 3. Files with any other extension are assumed to be containing |
| 93 | a single document. |
| 94 | DOCS: https://spacy.io/api/cli#apply |
| 95 | """ |
| 96 | data_path = ensure_path(data_path) |
| 97 | output_file = ensure_path(output_file) |
| 98 | code_path = ensure_path(code_path) |
| 99 | if output_file.exists() and not force_overwrite: |
| 100 | msg.fail(force_msg, exits=1) |
| 101 | if not data_path.exists(): |
| 102 | msg.fail(f"Couldn't find data path: {data_path}", exits=1) |
| 103 | import_code(code_path) |
| 104 | setup_gpu(use_gpu) |
| 105 | apply(data_path, output_file, model, text_key, batch_size, n_process) |
| 106 | |
| 107 | |
| 108 | def apply( |
nothing calls this directly
no test coverage detected
searching dependent graphs…