Reject docstrings whose characters cannot be encoded in cp1252. Runs only on docstrings (module-, class-, and function-level), not on string literals or comments -- those don't reach the Windows salt-run/salt -d output paths that surface as user-visible test failures.
(
ctx: Context,
files: list[pathlib.Path],
)
| 1138 | }, |
| 1139 | ) |
| 1140 | def check_cp1252_docstrings( |
| 1141 | ctx: Context, |
| 1142 | files: list[pathlib.Path], |
| 1143 | ) -> None: |
| 1144 | """ |
| 1145 | Reject docstrings whose characters cannot be encoded in cp1252. |
| 1146 | |
| 1147 | Runs only on docstrings (module-, class-, and function-level), not |
| 1148 | on string literals or comments -- those don't reach the Windows |
| 1149 | salt-run/salt -d output paths that surface as user-visible test |
| 1150 | failures. |
| 1151 | """ |
| 1152 | if not files: |
| 1153 | _files = list(SALT_CODE_DIR.rglob("*.py")) |
| 1154 | else: |
| 1155 | _files = [fpath.resolve() for fpath in files if fpath.suffix == ".py"] |
| 1156 | |
| 1157 | errors = 0 |
| 1158 | for path in _files: |
| 1159 | if str(path).startswith(str(tools.utils.REPO_ROOT / "salt" / "ext")): |
| 1160 | continue |
| 1161 | try: |
| 1162 | tree = ast.parse(path.read_text(), filename=str(path)) |
| 1163 | except (SyntaxError, UnicodeDecodeError) as exc: |
| 1164 | ctx.warn(f"Could not parse {path}: {exc}") |
| 1165 | continue |
| 1166 | for node in ast.walk(tree): |
| 1167 | if not isinstance( |
| 1168 | node, (ast.Module, ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef) |
| 1169 | ): |
| 1170 | continue |
| 1171 | docstring = ast.get_docstring(node, clean=False) |
| 1172 | if not docstring: |
| 1173 | continue |
| 1174 | try: |
| 1175 | docstring.encode("cp1252") |
| 1176 | except UnicodeEncodeError as exc: |
| 1177 | # ``exc.object[exc.start]`` is the offending character. |
| 1178 | bad_chars = sorted( |
| 1179 | { |
| 1180 | ch |
| 1181 | for ch in docstring |
| 1182 | if ord(ch) > 127 and not _cp1252_encodable(ch) |
| 1183 | } |
| 1184 | ) |
| 1185 | rendered = ", ".join(f"U+{ord(c):04X} {c!r}" for c in bad_chars) |
| 1186 | try: |
| 1187 | relpath = path.relative_to(tools.utils.REPO_ROOT) |
| 1188 | except ValueError: |
| 1189 | # File outside the repo root (unusual; defensive |
| 1190 | # for direct invocation against an arbitrary path). |
| 1191 | relpath = path |
| 1192 | name = getattr(node, "name", "<module>") |
| 1193 | lineno = getattr(node, "lineno", 1) |
| 1194 | ctx.error( |
| 1195 | f"{relpath}:{lineno} {name}: docstring contains " |
| 1196 | f"cp1252-unencodable characters ({rendered}). " |
| 1197 | f"These break salt-run -d / salt -d on Windows where " |