Strip characters (or whole substrings) from `text`. Parameters ---------- text : str Text to process and strip. strip : str or sequence of str, optional (default: '') If a ``str``: every occurrence of *any character* in `strip` is removed from `text` (the lon
(text, strip="")
| 1106 | |
| 1107 | |
| 1108 | def text_strip(text, strip=""): |
| 1109 | """Strip characters (or whole substrings) from `text`. |
| 1110 | |
| 1111 | Parameters |
| 1112 | ---------- |
| 1113 | text : str |
| 1114 | Text to process and strip. |
| 1115 | strip : str or sequence of str, optional (default: '') |
| 1116 | If a ``str``: every occurrence of *any character* in `strip` is |
| 1117 | removed from `text` (the long-standing behaviour). |
| 1118 | If a list/tuple of ``str``: every occurrence of *each substring* |
| 1119 | is removed from `text`. This is the request from #484 — the |
| 1120 | whole-substring mode is opt-in by passing a sequence, so |
| 1121 | existing callers passing a single string keep their per-character |
| 1122 | semantics. |
| 1123 | |
| 1124 | Returns |
| 1125 | ------- |
| 1126 | stripped : str |
| 1127 | """ |
| 1128 | if not strip: |
| 1129 | return text |
| 1130 | |
| 1131 | if isinstance(strip, (list, tuple)): |
| 1132 | # Substring-strip mode: build an alternation of escaped pieces. |
| 1133 | # Drop empties so users passing ["", "foo"] don't get "match the |
| 1134 | # empty string everywhere" behaviour. |
| 1135 | pieces = [s for s in strip if s] |
| 1136 | if not pieces: |
| 1137 | return text |
| 1138 | pattern = "|".join(re.escape(s) for s in pieces) |
| 1139 | return re.sub(pattern, "", text, flags=re.UNICODE) |
| 1140 | |
| 1141 | # Backward-compatible character-class strip. |
| 1142 | stripped = re.sub( |
| 1143 | rf"[{''.join(map(re.escape, strip))}]", "", text, flags=re.UNICODE |
| 1144 | ) |
| 1145 | return stripped |
| 1146 | |
| 1147 | |
| 1148 | # TODO: combine the following functions into a TextProcessor class which |
no outgoing calls