MCPcopy
hub / github.com/PaddlePaddle/PaddleFormers / find_dead_links

Function find_dead_links

scripts/codestyle/check_dead_links.py:31–71  ·  view source on GitHub ↗
(directory)

Source from the content-addressed store, hash-verified

29
30
31def find_dead_links(directory):
32 # 正则表达式,用于匹配Markdown和reStructuredText中的链接
33 markdown_link_pattern = r"\[([^\[\]]+)\]\(([^)]+)\)" # 修改正则表达式以捕获链接文本
34 rst_link_pattern = r"``([^`]+) <([^>]+)>`_" # reStructuredText链接
35 dead_links = []
36
37 for root, dirs, files in os.walk(directory):
38 if "third_party" in root:
39 continue
40 for file in files:
41 if file.endswith((".md", ".rst")):
42 file_path = os.path.join(root, file)
43 try:
44 with open(file_path, "r", encoding="utf-8") as f:
45 content = f.read()
46
47 # 查找Markdown链接
48 markdown_matches = re.findall(markdown_link_pattern, content)
49 for link_text, match in markdown_matches:
50 if match.startswith(("http:", "https:")):
51 # 忽略外部链接
52 continue
53 elif "#" in match:
54 # 这是一个锚点链接,忽略文件系统检查
55 continue
56 abs_path = os.path.abspath(os.path.join(os.path.dirname(file_path), match))
57 if not os.path.exists(abs_path):
58 dead_links.append((file_path, link_text, "Markdown Link: " + abs_path))
59
60 # 查找reStructuredText链接
61 rst_matches = re.findall(rst_link_pattern, content)
62 for text, url in rst_matches:
63 if not url.startswith(("http:", "https:")):
64 abs_path = os.path.abspath(os.path.join(os.path.dirname(file_path), url))
65 if not os.path.exists(abs_path):
66 dead_links.append((file_path, text, "reStructuredText Link: " + abs_path))
67
68 except Exception as e:
69 print(f"Error reading {file_path}: {e}")
70
71 return dead_links
72
73
74def create_symlinks(root_dir, src_dir, tgt_dir, file_extension=".md"):

Callers 1

process_fileFunction · 0.85

Calls 3

readMethod · 0.45
existsMethod · 0.45
appendMethod · 0.45

Tested by

no test coverage detected