This class defines methods to parse the input HTML page to fingerprint the back-end database management system
| 17 | from lib.core.threads import getCurrentThreadData |
| 18 | |
| 19 | class HTMLHandler(ContentHandler): |
| 20 | """ |
| 21 | This class defines methods to parse the input HTML page to |
| 22 | fingerprint the back-end database management system |
| 23 | """ |
| 24 | |
| 25 | def __init__(self, page): |
| 26 | ContentHandler.__init__(self) |
| 27 | |
| 28 | self._dbms = None |
| 29 | self._page = (page or "") |
| 30 | try: |
| 31 | self._lower_page = self._page.lower() |
| 32 | except SystemError: # https://bugs.python.org/issue18183 |
| 33 | self._lower_page = None |
| 34 | self._urldecoded_page = urldecode(self._page) |
| 35 | |
| 36 | self.dbms = None |
| 37 | |
| 38 | def _markAsErrorPage(self): |
| 39 | threadData = getCurrentThreadData() |
| 40 | threadData.lastErrorPage = (threadData.lastRequestUID, self._page) |
| 41 | |
| 42 | def startElement(self, name, attrs): |
| 43 | if self.dbms: |
| 44 | return |
| 45 | |
| 46 | if name == "dbms": |
| 47 | self._dbms = attrs.get("value") |
| 48 | |
| 49 | elif name == "error": |
| 50 | regexp = attrs.get("regexp") |
| 51 | if regexp not in kb.cache.regex: |
| 52 | keywords = re.findall(r"\w+", re.sub(r"\\.", " ", regexp)) |
| 53 | keywords = sorted(keywords, key=len) |
| 54 | kb.cache.regex[regexp] = keywords[-1].lower() |
| 55 | |
| 56 | if ('|' in regexp or kb.cache.regex[regexp] in (self._lower_page or kb.cache.regex[regexp])) and re.search(regexp, self._urldecoded_page, re.I): |
| 57 | self.dbms = self._dbms |
| 58 | self._markAsErrorPage() |
| 59 | kb.forkNote = kb.forkNote or attrs.get("fork") |
| 60 | |
| 61 | def htmlParser(page): |
| 62 | """ |
no outgoing calls
no test coverage detected
searching dependent graphs…