统计markdown文件的字数 :param file_path: 文件路径 :return: 字数统计 注:不包括链接URL和元数据部分
(file_path)
| 468 | return header + content_body + footer + channel |
| 469 | |
| 470 | def count_words(file_path): |
| 471 | """ |
| 472 | 统计markdown文件的字数 |
| 473 | :param file_path: 文件路径 |
| 474 | :return: 字数统计 |
| 475 | 注:不包括链接URL和元数据部分 |
| 476 | """ |
| 477 | print("Counting words in the file...") |
| 478 | with open(file_path, 'r', encoding='utf-8') as f: |
| 479 | content = f.read() |
| 480 | |
| 481 | # 移除markdown的元数据 |
| 482 | content = re.sub(r'---.*?---', '', content, flags=re.DOTALL) |
| 483 | |
| 484 | # 保留链接文本,移除URL |
| 485 | content = re.sub(r'\[([^\]]+)\]\([^\)]+\)', r'\1', content) |
| 486 | |
| 487 | # 移除其他markdown标记 |
| 488 | content = re.sub(r'[#*`>]', '', content) # 移除#、*、`、>等markdown标记 |
| 489 | content = re.sub(r'\n+', '\n', content) # 将多个换行符替换为单个 |
| 490 | |
| 491 | # 使用jieba进行分词 |
| 492 | words = jieba.lcut(content) |
| 493 | # 过滤掉空白字符和标点符号 |
| 494 | words = [word for word in words if word.strip()] |
| 495 | |
| 496 | return len(words) |
| 497 | |
| 498 | def update_word_count(file_path, word_count): |
| 499 | """ |