MCPcopy
hub / github.com/ampproject/amphtml / parse

Method parse

validator/js/engine/htmlparser.js:514–652  ·  view source on GitHub ↗

* Given a SAX-like `HtmlSaxHandler` parses a * `htmlText` and lets the `handler` know the structure while * visiting the nodes. If the provided handler is an implementation of * `htmlparser.HtmlSaxHandlerWithLocation`, then its * `setDocLocator` method will get called prior to * `star

(handler, htmlText)

Source from the content-addressed store, hash-verified

512 * @param {string} htmlText The html text.
513 */
514 parse(handler, htmlText) {
515 let htmlUpper = null;
516 let inTag = false; // True iff we're currently processing a tag.
517 const attribs = []; // Accumulates attribute names and values.
518 let tagName; // The name of the tag currently being processed.
519 let eflags; // The element flags for the current tag.
520 let openTag; // True if the current tag is an open tag.
521 const tagStack = new TagNameStack(handler);
522
523 // Only provide location information if the handler implements the
524 // setDocLocator method.
525 let locator = null;
526 if (handler instanceof parserInterface.HtmlSaxHandlerWithLocation) {
527 locator = new DocLocatorImpl(htmlText);
528 handler.setDocLocator(locator);
529 }
530
531 // Lets the handler know that we are starting to parse the document.
532 handler.startDoc();
533
534 // Consumes tokens from the htmlText and stops once all tokens are
535 // processed.
536 while (htmlText) {
537 const regex = inTag ? INSIDE_TAG_TOKEN_ : OUTSIDE_TAG_TOKEN_;
538 // Gets the next token
539 const m = htmlText.match(regex);
540 if (locator) {
541 locator.advancePos(m[0]);
542 }
543 // And removes it from the string
544 htmlText = htmlText.substring(m[0].length);
545
546 // TODO(goto): cleanup this code breaking it into separate methods.
547 if (inTag) {
548 if (m[1]) { // Attribute.
549 // SetAttribute with uppercase names doesn't work on IE6.
550 const attribName = parserInterface.toLowerCase(m[1]);
551 // Use empty string as value for valueless attribs, so
552 // <input type=checkbox checked>
553 // gets attributes ['type', 'checkbox', 'checked', '']
554 let decodedValue = '';
555 if (m[2]) {
556 let encodedValue = m[3];
557 switch (encodedValue.charCodeAt(0)) { // Strip quotes.
558 case 34: // double quote "
559 case 39: // single quote '
560 encodedValue =
561 encodedValue.substring(1, encodedValue.length - 1);
562 break;
563 }
564 decodedValue =
565 this.unescapeEntities_(this.stripNULs_(encodedValue));
566 }
567 attribs.push(attribName, decodedValue);
568 } else if (m[4]) {
569 if (eflags !== void 0) { // False if not in allowlist.
570 if (openTag) {
571 tagStack.startTag(new parserInterface.ParsedHtmlTag(

Callers 3

htmlparser_test.jsFile · 0.45
mainFunction · 0.45
index_test.jsFile · 0.45

Calls 15

startTagMethod · 0.95
endTagMethod · 0.95
pcdataMethod · 0.95
exitRemainingTagsMethod · 0.95
effectiveBodyAttribsMethod · 0.95
advancePosMethod · 0.80
unescapeEntities_Method · 0.80
stripNULs_Method · 0.80
normalizeRCData_Method · 0.80
snapshotPosMethod · 0.80
setDocLocatorMethod · 0.45
startDocMethod · 0.45

Tested by

no test coverage detected