Function processDoc

src/archivist.js:878–920 · view source on GitHub ↗

({documents, strings})

Source from the content-addressed store, hash-verified

876	}
877
878	function processDoc({documents, strings}) {
879	/*
880	Info
881	Implementation Notes
882
883	1. Code uses spec at:
884	https://chromedevtools.github.io/devtools-protocol/tot/DOMSnapshot/#type-NodeTreeSnapshot
885
886	2. Note that so far the below will NOT produce text for and therefore we will NOT
887	index textarea or input elements. We can access those by using the textValue and
888	inputValue array properties of the doc, if we want to implement that.
889	*/
890
891	const texts = [];
892	for( const doc of documents) {
893	const textIndices = doc.nodes.nodeType.reduce((Indices, type, index) => {
894	if ( type === TEXT_NODE ) {
895	const parentIndex = doc.nodes.parentIndex[index];
896	const forbiddenParent = parentIndex >= 0 &&
897	FORBIDDEN_TEXT_PARENT.has(strings[
898	doc.nodes.nodeName[
899	parentIndex
900	]
901	])
902	if ( ! forbiddenParent ) {
903	Indices.push(index);
904	}
905	}
906	return Indices;
907	}, []);
908	textIndices.forEach(index => {
909	const stringsIndex = doc.nodes.nodeValue[index];
910	if ( stringsIndex >= 0 ) {
911	const text = strings[stringsIndex];
912	texts.push(text);
913	}
914	});
915	}
916
917	const pageText = texts.filter(t => t.trim()).join(' ');
918	DEBUG.verboseSlow && console.log('Page text>>>', pageText);
919	return pageText;
920	}
921
922	async function isReady() {
923	return await untilTrue(() => Status.loaded);

indexURLFunction · 0.85

archiveAndIndexURLFunction · 0.85

no outgoing calls

no test coverage detected