Collapses strings and chars on a line to simple "" or '' blocks. We nix strings first so we're not fooled by text like '"http://"' Args: elided: The line being processed. Returns: The line with collapsed strings.
(elided)
| 2171 | |
| 2172 | @staticmethod |
| 2173 | def _CollapseStrings(elided): |
| 2174 | """Collapses strings and chars on a line to simple "" or '' blocks. |
| 2175 | |
| 2176 | We nix strings first so we're not fooled by text like '"http://"' |
| 2177 | |
| 2178 | Args: |
| 2179 | elided: The line being processed. |
| 2180 | |
| 2181 | Returns: |
| 2182 | The line with collapsed strings. |
| 2183 | """ |
| 2184 | if _RE_PATTERN_INCLUDE.match(elided): |
| 2185 | return elided |
| 2186 | |
| 2187 | # Remove escaped characters first to make quote/single quote collapsing |
| 2188 | # basic. Things that look like escaped characters shouldn't occur |
| 2189 | # outside of strings and chars. |
| 2190 | elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub("", elided) |
| 2191 | |
| 2192 | # Replace quoted strings and digit separators. Both single quotes |
| 2193 | # and double quotes are processed in the same loop, otherwise |
| 2194 | # nested quotes wouldn't work. |
| 2195 | collapsed = "" |
| 2196 | while True: |
| 2197 | # Find the first quote character |
| 2198 | match = re.match(r'^([^\'"]*)([\'"])(.*)$', elided) |
| 2199 | if not match: |
| 2200 | collapsed += elided |
| 2201 | break |
| 2202 | head, quote, tail = match.groups() |
| 2203 | |
| 2204 | if quote == '"': |
| 2205 | # Collapse double quoted strings |
| 2206 | second_quote = tail.find('"') |
| 2207 | if second_quote >= 0: |
| 2208 | collapsed += head + '""' |
| 2209 | elided = tail[second_quote + 1 :] |
| 2210 | else: |
| 2211 | # Unmatched double quote, don't bother processing the rest |
| 2212 | # of the line since this is probably a multiline string. |
| 2213 | collapsed += elided |
| 2214 | break |
| 2215 | else: |
| 2216 | # Found single quote, check nearby text to eliminate digit separators. |
| 2217 | # |
| 2218 | # There is no special handling for floating point here, because |
| 2219 | # the integer/fractional/exponent parts would all be parsed |
| 2220 | # correctly as long as there are digits on both sides of the |
| 2221 | # separator. So we are fine as long as we don't see something |
| 2222 | # like "0.'3" (gcc 4.9.0 will not allow this literal). |
| 2223 | if re.search(r"\b(?:0[bBxX]?|[1-9])[0-9a-fA-F]*$", head): |
| 2224 | match_literal = re.match(r"^((?:\'?[0-9a-zA-Z_])*)(.*)$", "'" + tail) |
| 2225 | collapsed += head + match_literal.group(1).replace("'", "") |
| 2226 | elided = match_literal.group(2) |
| 2227 | else: |
| 2228 | second_quote = tail.find("'") |
| 2229 | if second_quote >= 0: |
| 2230 | collapsed += head + "''" |