hub / github.com/PaddlePaddle/PaddleFormers / find_dead_links

Function find_dead_links

scripts/codestyle/check_dead_links.py:31–71 · view source on GitHub ↗

(directory)

Source from the content-addressed store, hash-verified

29
30
31	def find_dead_links(directory):
32	# 正则表达式，用于匹配Markdown和reStructuredText中的链接
33	markdown_link_pattern = r"\[([^\[\]]+)\]\(([^)]+)\)" # 修改正则表达式以捕获链接文本
34	rst_link_pattern = r"``([^`]+) <([^>]+)>`_" # reStructuredText链接
35	dead_links = []
36
37	for root, dirs, files in os.walk(directory):
38	if "third_party" in root:
39	continue
40	for file in files:
41	if file.endswith((".md", ".rst")):
42	file_path = os.path.join(root, file)
43	try:
44	with open(file_path, "r", encoding="utf-8") as f:
45	content = f.read()
46
47	# 查找Markdown链接
48	markdown_matches = re.findall(markdown_link_pattern, content)
49	for link_text, match in markdown_matches:
50	if match.startswith(("http:", "https:")):
51	# 忽略外部链接
52	continue
53	elif "#" in match:
54	# 这是一个锚点链接，忽略文件系统检查
55	continue
56	abs_path = os.path.abspath(os.path.join(os.path.dirname(file_path), match))
57	if not os.path.exists(abs_path):
58	dead_links.append((file_path, link_text, "Markdown Link: " + abs_path))
59
60	# 查找reStructuredText链接
61	rst_matches = re.findall(rst_link_pattern, content)
62	for text, url in rst_matches:
63	if not url.startswith(("http:", "https:")):
64	abs_path = os.path.abspath(os.path.join(os.path.dirname(file_path), url))
65	if not os.path.exists(abs_path):
66	dead_links.append((file_path, text, "reStructuredText Link: " + abs_path))
67
68	except Exception as e:
69	print(f"Error reading {file_path}: {e}")
70
71	return dead_links
72
73
74	def create_symlinks(root_dir, src_dir, tgt_dir, file_extension=".md"):

Callers 1

process_fileFunction · 0.85

Calls 3

readMethod · 0.45

existsMethod · 0.45

appendMethod · 0.45

Tested by

no test coverage detected