(string, skip_unit=False)
| 210 | |
| 211 | |
| 212 | def strip_string(string, skip_unit=False): |
| 213 | string = str(string).strip() |
| 214 | # linebreaks |
| 215 | string = string.replace("\n", "") |
| 216 | |
| 217 | # right "." |
| 218 | string = string.rstrip(".") |
| 219 | |
| 220 | # remove inverse spaces |
| 221 | # replace \\ with \ |
| 222 | string = string.replace("\\!", "") |
| 223 | # string = string.replace("\\ ", "") |
| 224 | # string = string.replace("\\\\", "\\") |
| 225 | |
| 226 | # matrix |
| 227 | string = re.sub(r"\\begin\{array\}\{.*?\}", r"\\begin{pmatrix}", string) |
| 228 | string = re.sub(r"\\end\{array\}", r"\\end{pmatrix}", string) |
| 229 | string = string.replace("bmatrix", "pmatrix") |
| 230 | |
| 231 | # replace tfrac and dfrac with frac |
| 232 | string = string.replace("tfrac", "frac") |
| 233 | string = string.replace("dfrac", "frac") |
| 234 | string = ( |
| 235 | string.replace("\\neq", "\\ne") |
| 236 | .replace("\\leq", "\\le") |
| 237 | .replace("\\geq", "\\ge") |
| 238 | ) |
| 239 | |
| 240 | # remove \left and \right |
| 241 | string = string.replace("\\left", "") |
| 242 | string = string.replace("\\right", "") |
| 243 | string = string.replace("\\{", "{") |
| 244 | string = string.replace("\\}", "}") |
| 245 | |
| 246 | # Remove unit: miles, dollars if after is not none |
| 247 | _string = re.sub(r"\\text{.*?}$", "", string).strip() |
| 248 | if _string != "" and _string != string: |
| 249 | # print("Warning: unit not removed: '{}' -> '{}'".format(string, _string)) |
| 250 | string = _string |
| 251 | |
| 252 | if not skip_unit: |
| 253 | # Remove unit: texts |
| 254 | for _ in range(2): |
| 255 | for unit_text in unit_texts: |
| 256 | # use regex, the prefix should be either the start of the string or a non-alphanumeric character |
| 257 | # the suffix should be either the end of the string or a non-alphanumeric character |
| 258 | _string = re.sub(r"(^|\W)" + unit_text + r"($|\W)", r"\1\2", string) |
| 259 | if _string != "": |
| 260 | string = _string |
| 261 | |
| 262 | # Remove circ (degrees) |
| 263 | string = string.replace("^{\\circ}", "") |
| 264 | string = string.replace("^\\circ", "") |
| 265 | |
| 266 | # remove dollar signs |
| 267 | string = string.replace("\\$", "") |
| 268 | string = string.replace("$", "") |
| 269 | string = string.replace("\\(", "").replace("\\)", "") |
no test coverage detected