(directory)
| 29 | |
| 30 | |
| 31 | def find_dead_links(directory): |
| 32 | # 正则表达式,用于匹配Markdown和reStructuredText中的链接 |
| 33 | markdown_link_pattern = r"\[([^\[\]]+)\]\(([^)]+)\)" # 修改正则表达式以捕获链接文本 |
| 34 | rst_link_pattern = r"``([^`]+) <([^>]+)>`_" # reStructuredText链接 |
| 35 | dead_links = [] |
| 36 | |
| 37 | for root, dirs, files in os.walk(directory): |
| 38 | if "third_party" in root: |
| 39 | continue |
| 40 | for file in files: |
| 41 | if file.endswith((".md", ".rst")): |
| 42 | file_path = os.path.join(root, file) |
| 43 | try: |
| 44 | with open(file_path, "r", encoding="utf-8") as f: |
| 45 | content = f.read() |
| 46 | |
| 47 | # 查找Markdown链接 |
| 48 | markdown_matches = re.findall(markdown_link_pattern, content) |
| 49 | for link_text, match in markdown_matches: |
| 50 | if match.startswith(("http:", "https:")): |
| 51 | # 忽略外部链接 |
| 52 | continue |
| 53 | elif "#" in match: |
| 54 | # 这是一个锚点链接,忽略文件系统检查 |
| 55 | continue |
| 56 | abs_path = os.path.abspath(os.path.join(os.path.dirname(file_path), match)) |
| 57 | if not os.path.exists(abs_path): |
| 58 | dead_links.append((file_path, link_text, "Markdown Link: " + abs_path)) |
| 59 | |
| 60 | # 查找reStructuredText链接 |
| 61 | rst_matches = re.findall(rst_link_pattern, content) |
| 62 | for text, url in rst_matches: |
| 63 | if not url.startswith(("http:", "https:")): |
| 64 | abs_path = os.path.abspath(os.path.join(os.path.dirname(file_path), url)) |
| 65 | if not os.path.exists(abs_path): |
| 66 | dead_links.append((file_path, text, "reStructuredText Link: " + abs_path)) |
| 67 | |
| 68 | except Exception as e: |
| 69 | print(f"Error reading {file_path}: {e}") |
| 70 | |
| 71 | return dead_links |
| 72 | |
| 73 | |
| 74 | def create_symlinks(root_dir, src_dir, tgt_dir, file_extension=".md"): |
no test coverage detected