MCPcopy
hub / github.com/unclecode/crawl4ai / load_spacy_model

Function load_spacy_model

crawl4ai/model_loader.py:158–216  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

156
157@lru_cache()
158def load_spacy_model():
159 import spacy
160 name = "models/reuters"
161 home_folder = get_home_folder()
162 model_folder = Path(home_folder) / name
163
164 # Check if the model directory already exists
165 if not (model_folder.exists() and any(model_folder.iterdir())):
166 repo_url = "https://github.com/unclecode/crawl4ai.git"
167 branch = MODEL_REPO_BRANCH
168 repo_folder = Path(home_folder) / "crawl4ai"
169
170 print("[LOG] ⏬ Downloading Spacy model for the first time...")
171
172 # Remove existing repo folder if it exists
173 if repo_folder.exists():
174 try:
175 shutil.rmtree(repo_folder)
176 if model_folder.exists():
177 shutil.rmtree(model_folder)
178 except PermissionError:
179 print("[WARNING] Unable to remove existing folders. Please manually delete the following folders and try again:")
180 print(f"- {repo_folder}")
181 print(f"- {model_folder}")
182 return None
183
184 try:
185 # Clone the repository
186 subprocess.run(
187 ["git", "clone", "-b", branch, repo_url, str(repo_folder)],
188 stdout=subprocess.DEVNULL,
189 stderr=subprocess.DEVNULL,
190 check=True
191 )
192
193 # Create the models directory if it doesn't exist
194 models_folder = Path(home_folder) / "models"
195 models_folder.mkdir(parents=True, exist_ok=True)
196
197 # Copy the reuters model folder to the models directory
198 source_folder = repo_folder / "models" / "reuters"
199 shutil.copytree(source_folder, model_folder)
200
201 # Remove the cloned repository
202 shutil.rmtree(repo_folder)
203
204 print("[LOG] ✅ Spacy Model downloaded successfully")
205 except subprocess.CalledProcessError as e:
206 print(f"An error occurred while cloning the repository: {e}")
207 return None
208 except Exception as e:
209 print(f"An error occurred: {e}")
210 return None
211
212 try:
213 return spacy.load(str(model_folder))
214 except Exception as e:
215 print(f"Error loading spacy model: {e}")

Callers

nothing calls this directly

Calls 2

get_home_folderFunction · 0.70
runMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…