Parser for stpl templates.
| 3424 | |
| 3425 | |
| 3426 | class StplParser(object): |
| 3427 | ''' Parser for stpl templates. ''' |
| 3428 | _re_cache = {} #: Cache for compiled re patterns |
| 3429 | # This huge pile of voodoo magic splits python code into 8 different tokens. |
| 3430 | # 1: All kinds of python strings (trust me, it works) |
| 3431 | _re_tok = '([urbURB]?(?:\'\'(?!\')|""(?!")|\'{6}|"{6}' \ |
| 3432 | '|\'(?:[^\\\\\']|\\\\.)+?\'|"(?:[^\\\\"]|\\\\.)+?"' \ |
| 3433 | '|\'{3}(?:[^\\\\]|\\\\.|\\n)+?\'{3}' \ |
| 3434 | '|"{3}(?:[^\\\\]|\\\\.|\\n)+?"{3}))' |
| 3435 | _re_inl = _re_tok.replace('|\\n','') # We re-use this string pattern later |
| 3436 | # 2: Comments (until end of line, but not the newline itself) |
| 3437 | _re_tok += '|(#.*)' |
| 3438 | # 3,4: Open and close grouping tokens |
| 3439 | _re_tok += '|([\\[\\{\\(])' |
| 3440 | _re_tok += '|([\\]\\}\\)])' |
| 3441 | # 5,6: Keywords that start or continue a python block (only start of line) |
| 3442 | _re_tok += '|^([ \\t]*(?:if|for|while|with|try|def|class)\\b)' \ |
| 3443 | '|^([ \\t]*(?:elif|else|except|finally)\\b)' |
| 3444 | # 7: Our special 'end' keyword (but only if it stands alone) |
| 3445 | _re_tok += '|((?:^|;)[ \\t]*end[ \\t]*(?=(?:%(block_close)s[ \\t]*)?\\r?$|;|#))' |
| 3446 | # 8: A customizable end-of-code-block template token (only end of line) |
| 3447 | _re_tok += '|(%(block_close)s[ \\t]*(?=\\r?$))' |
| 3448 | # 9: And finally, a single newline. The 10th token is 'everything else' |
| 3449 | _re_tok += '|(\\r?\\n)' |
| 3450 | |
| 3451 | # Match the start tokens of code areas in a template |
| 3452 | _re_split = '(?m)^[ \t]*(\\\\?)((%(line_start)s)|(%(block_start)s))(%%?)' |
| 3453 | # Match inline statements (may contain python strings) |
| 3454 | _re_inl = '(?m)%%(inline_start)s((?:%s|[^\'"\n]*?)+)%%(inline_end)s' % _re_inl |
| 3455 | _re_tok = '(?m)' + _re_tok |
| 3456 | |
| 3457 | default_syntax = '<% %> % {{ }}' |
| 3458 | |
| 3459 | def __init__(self, source, syntax=None, encoding='utf8'): |
| 3460 | self.source, self.encoding = touni(source, encoding), encoding |
| 3461 | self.set_syntax(syntax or self.default_syntax) |
| 3462 | self.code_buffer, self.text_buffer = [], [] |
| 3463 | self.lineno, self.offset = 1, 0 |
| 3464 | self.indent, self.indent_mod = 0, 0 |
| 3465 | self.paren_depth = 0 |
| 3466 | |
| 3467 | def get_syntax(self): |
| 3468 | ''' Tokens as a space separated string (default: <% %> % {{ }}) ''' |
| 3469 | return self._syntax |
| 3470 | |
| 3471 | def set_syntax(self, syntax): |
| 3472 | self._syntax = syntax |
| 3473 | self._tokens = syntax.split() |
| 3474 | if not syntax in self._re_cache: |
| 3475 | names = 'block_start block_close line_start inline_start inline_end' |
| 3476 | etokens = map(re.escape, self._tokens) |
| 3477 | pattern_vars = dict(zip(names.split(), etokens)) |
| 3478 | patterns = (self._re_split, self._re_tok, self._re_inl) |
| 3479 | patterns = [re.compile(p%pattern_vars) for p in patterns] |
| 3480 | self._re_cache[syntax] = patterns |
| 3481 | self.re_split, self.re_tok, self.re_inl = self._re_cache[syntax] |
| 3482 | |
| 3483 | syntax = property(get_syntax, set_syntax) |