Loads the model from the specified path or directory. Arguments: path: Either load a BERTopic model from a file (`.pickle`) or a folder containing `.safetensors` or `.bin` files. embedding_model: Additionally load in an embedding model if it was not
(cls, path: str, embedding_model=None)
| 3532 | |
| 3533 | @classmethod |
| 3534 | def load(cls, path: str, embedding_model=None): |
| 3535 | """Loads the model from the specified path or directory. |
| 3536 | |
| 3537 | Arguments: |
| 3538 | path: Either load a BERTopic model from a file (`.pickle`) or a folder containing |
| 3539 | `.safetensors` or `.bin` files. |
| 3540 | embedding_model: Additionally load in an embedding model if it was not saved |
| 3541 | in the BERTopic model file or directory. |
| 3542 | |
| 3543 | Examples: |
| 3544 | ```python |
| 3545 | BERTopic.load("model_dir") |
| 3546 | ``` |
| 3547 | |
| 3548 | or if you did not save the embedding model: |
| 3549 | |
| 3550 | ```python |
| 3551 | BERTopic.load("model_dir", embedding_model="all-MiniLM-L6-v2") |
| 3552 | ``` |
| 3553 | """ |
| 3554 | file_or_dir = Path(path) |
| 3555 | |
| 3556 | # Load from Pickle |
| 3557 | if file_or_dir.is_file(): |
| 3558 | with open(file_or_dir, "rb") as file: |
| 3559 | if embedding_model: |
| 3560 | topic_model = joblib.load(file) |
| 3561 | topic_model.embedding_model = select_backend(embedding_model, verbose=topic_model.verbose) |
| 3562 | else: |
| 3563 | topic_model = joblib.load(file) |
| 3564 | return topic_model |
| 3565 | |
| 3566 | # Load from directory or HF |
| 3567 | if file_or_dir.is_dir(): |
| 3568 | topics, params, tensors, ctfidf_tensors, ctfidf_config, images = save_utils.load_local_files(file_or_dir) |
| 3569 | elif "/" in str(path): |
| 3570 | topics, params, tensors, ctfidf_tensors, ctfidf_config, images = save_utils.load_files_from_hf(path) |
| 3571 | else: |
| 3572 | raise ValueError("Make sure to either pass a valid directory or HF model.") |
| 3573 | topic_model = _create_model_from_files( |
| 3574 | topics, |
| 3575 | params, |
| 3576 | tensors, |
| 3577 | ctfidf_tensors, |
| 3578 | ctfidf_config, |
| 3579 | images, |
| 3580 | warn_no_backend=(embedding_model is None), |
| 3581 | ) |
| 3582 | |
| 3583 | # Replace embedding model if one is specifically chosen |
| 3584 | if embedding_model is not None: |
| 3585 | topic_model.embedding_model = select_backend(embedding_model, verbose=topic_model.verbose) |
| 3586 | |
| 3587 | return topic_model |
| 3588 | |
| 3589 | @classmethod |
| 3590 | def merge_models(cls, models, min_similarity: float = 0.7, embedding_model=None): |