()
| 156 | |
| 157 | @lru_cache() |
| 158 | def load_spacy_model(): |
| 159 | import spacy |
| 160 | name = "models/reuters" |
| 161 | home_folder = get_home_folder() |
| 162 | model_folder = Path(home_folder) / name |
| 163 | |
| 164 | # Check if the model directory already exists |
| 165 | if not (model_folder.exists() and any(model_folder.iterdir())): |
| 166 | repo_url = "https://github.com/unclecode/crawl4ai.git" |
| 167 | branch = MODEL_REPO_BRANCH |
| 168 | repo_folder = Path(home_folder) / "crawl4ai" |
| 169 | |
| 170 | print("[LOG] ⏬ Downloading Spacy model for the first time...") |
| 171 | |
| 172 | # Remove existing repo folder if it exists |
| 173 | if repo_folder.exists(): |
| 174 | try: |
| 175 | shutil.rmtree(repo_folder) |
| 176 | if model_folder.exists(): |
| 177 | shutil.rmtree(model_folder) |
| 178 | except PermissionError: |
| 179 | print("[WARNING] Unable to remove existing folders. Please manually delete the following folders and try again:") |
| 180 | print(f"- {repo_folder}") |
| 181 | print(f"- {model_folder}") |
| 182 | return None |
| 183 | |
| 184 | try: |
| 185 | # Clone the repository |
| 186 | subprocess.run( |
| 187 | ["git", "clone", "-b", branch, repo_url, str(repo_folder)], |
| 188 | stdout=subprocess.DEVNULL, |
| 189 | stderr=subprocess.DEVNULL, |
| 190 | check=True |
| 191 | ) |
| 192 | |
| 193 | # Create the models directory if it doesn't exist |
| 194 | models_folder = Path(home_folder) / "models" |
| 195 | models_folder.mkdir(parents=True, exist_ok=True) |
| 196 | |
| 197 | # Copy the reuters model folder to the models directory |
| 198 | source_folder = repo_folder / "models" / "reuters" |
| 199 | shutil.copytree(source_folder, model_folder) |
| 200 | |
| 201 | # Remove the cloned repository |
| 202 | shutil.rmtree(repo_folder) |
| 203 | |
| 204 | print("[LOG] ✅ Spacy Model downloaded successfully") |
| 205 | except subprocess.CalledProcessError as e: |
| 206 | print(f"An error occurred while cloning the repository: {e}") |
| 207 | return None |
| 208 | except Exception as e: |
| 209 | print(f"An error occurred: {e}") |
| 210 | return None |
| 211 | |
| 212 | try: |
| 213 | return spacy.load(str(model_folder)) |
| 214 | except Exception as e: |
| 215 | print(f"Error loading spacy model: {e}") |
nothing calls this directly
no test coverage detected
searching dependent graphs…