MCPcopy
hub / github.com/MaartenGr/BERTopic / generate_readme

Function generate_readme

bertopic/_save_utils.py:271–315  ·  view source on GitHub ↗

Generate README for HuggingFace model card.

(model, repo_id: str)

Source from the content-addressed store, hash-verified

269
270
271def generate_readme(model, repo_id: str):
272 """Generate README for HuggingFace model card."""
273 model_card = MODEL_CARD_TEMPLATE
274 topic_table_head = "| Topic ID | Topic Keywords | Topic Frequency | Label | \n|----------|----------------|-----------------|-------| \n"
275
276 # Get Statistics
277 model_name = repo_id.split("/")[-1]
278 params = {param: value for param, value in model.get_params().items() if "model" not in param}
279 params = "\n".join([f"* {param}: {value}" for param, value in params.items()])
280 topics = sorted(list(set(model.topics_)))
281 nr_topics = str(len(set(model.topics_)))
282
283 if model.topic_sizes_ is not None:
284 nr_documents = str(sum(model.topic_sizes_.values()))
285 else:
286 nr_documents = ""
287
288 # Topic information
289 topic_keywords = [" - ".join(next(zip(*model.get_topic(topic)))[:5]) for topic in topics]
290 topic_freq = [model.get_topic_freq(topic) for topic in topics]
291 topic_labels = model.custom_labels_ if model.custom_labels_ else [model.topic_labels_[topic] for topic in topics]
292 topics = [
293 f"| {topic} | {topic_keywords[index]} | {topic_freq[topic]} | {topic_labels[index]} | \n"
294 for index, topic in enumerate(topics)
295 ]
296 topics = topic_table_head + "".join(topics)
297 frameworks = "\n".join([f"* {param}: {value}" for param, value in get_package_versions().items()])
298
299 # Fill Statistics into model card
300 model_card = model_card.replace("{MODEL_NAME}", model_name)
301 model_card = model_card.replace("{PATH}", repo_id)
302 model_card = model_card.replace("{NR_TOPICS}", nr_topics)
303 model_card = model_card.replace("{TOPICS}", topics.strip())
304 model_card = model_card.replace("{NR_DOCUMENTS}", nr_documents)
305 model_card = model_card.replace("{HYPERPARAMS}", params)
306 model_card = model_card.replace("{FRAMEWORKS}", frameworks)
307
308 # Fill Pipeline tag
309 has_visual_aspect = check_has_visual_aspect(model)
310 if not has_visual_aspect:
311 model_card = model_card.replace("{PIPELINE_TAG}", "text-classification")
312 else:
313 model_card = model_card.replace("pipeline_tag: {PIPELINE_TAG}\n", "") # TODO add proper tag for this instance
314
315 return model_card
316
317
318def save_hf(model, save_directory, serialization: str):

Callers 1

push_to_hf_hubFunction · 0.85

Calls 5

get_package_versionsFunction · 0.85
check_has_visual_aspectFunction · 0.85
get_paramsMethod · 0.80
get_topicMethod · 0.80
get_topic_freqMethod · 0.80

Tested by

no test coverage detected