MCPcopy
hub / github.com/DO-SAY-GO/dn / processDoc

Function processDoc

src/archivist.js:878–920  ·  view source on GitHub ↗
({documents, strings})

Source from the content-addressed store, hash-verified

876 }
877
878 function processDoc({documents, strings}) {
879 /*
880 Info
881 Implementation Notes
882
883 1. Code uses spec at:
884 https://chromedevtools.github.io/devtools-protocol/tot/DOMSnapshot/#type-NodeTreeSnapshot
885
886 2. Note that so far the below will NOT produce text for and therefore we will NOT
887 index textarea or input elements. We can access those by using the textValue and
888 inputValue array properties of the doc, if we want to implement that.
889 */
890
891 const texts = [];
892 for( const doc of documents) {
893 const textIndices = doc.nodes.nodeType.reduce((Indices, type, index) => {
894 if ( type === TEXT_NODE ) {
895 const parentIndex = doc.nodes.parentIndex[index];
896 const forbiddenParent = parentIndex >= 0 &&
897 FORBIDDEN_TEXT_PARENT.has(strings[
898 doc.nodes.nodeName[
899 parentIndex
900 ]
901 ])
902 if ( ! forbiddenParent ) {
903 Indices.push(index);
904 }
905 }
906 return Indices;
907 }, []);
908 textIndices.forEach(index => {
909 const stringsIndex = doc.nodes.nodeValue[index];
910 if ( stringsIndex >= 0 ) {
911 const text = strings[stringsIndex];
912 texts.push(text);
913 }
914 });
915 }
916
917 const pageText = texts.filter(t => t.trim()).join(' ');
918 DEBUG.verboseSlow && console.log('Page text>>>', pageText);
919 return pageText;
920 }
921
922 async function isReady() {
923 return await untilTrue(() => Status.loaded);

Callers 2

indexURLFunction · 0.85
archiveAndIndexURLFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected