MCPcopy Index your code
hub / github.com/hoothin/UserScripts / getPageContent

Function getPageContent

DownloadAllContent/DownloadAllContent.user.js:1537–1690  ·  view source on GitHub ↗
(doc, cb, url)

Source from the content-addressed store, hash-verified

1535 }
1536
1537 function getPageContent(doc, cb, url){
1538 if(!doc)return i18n.error;
1539 if(processFunc){
1540 return processFunc(doc, cb, url);
1541 }
1542 if(doc.body && !doc.body.children.length)return doc.body.innerText;
1543 [].forEach.call(doc.querySelectorAll("span,div,ul"),function(item){
1544 var thisStyle=doc.defaultView?doc.defaultView.getComputedStyle(item):item.style;
1545 if(thisStyle && (thisStyle.display=="none" || (item.nodeName=="SPAN" && thisStyle.fontSize=="0px"))){
1546 item.innerHTML="";
1547 }
1548 });
1549 var i,j,k,rStr="",pageData=(doc.body?doc.body:doc).cloneNode(true);
1550 pageData.innerHTML=pageData.innerHTML.replace(/\<\!\-\-((.|[\n|\r|\r\n])*?)\-\-\>/g,"");
1551 [].forEach.call(pageData.querySelectorAll("font.jammer"),function(item){
1552 item.innerHTML="";
1553 });
1554 var selectors=GM_getValue("selectors");
1555 if(selectors){
1556 [].forEach.call(pageData.querySelectorAll(selectors),function(item){
1557 item.innerHTML="";
1558 });
1559 }
1560 [].forEach.call(pageData.querySelectorAll("script,style,link,noscript,iframe"),function(item){
1561 if (item && item.parentNode) {
1562 item.parentNode.removeChild(item);
1563 }
1564 });
1565 var endEle = ele => {
1566 return /^(I|STRONG|B|FONT|P|DL|DD|H\d)$/.test(ele.nodeName) && ele.children.length <= 1;
1567 };
1568 var largestContent,contents=pageData.querySelectorAll("span,div,article,p,td,pre"),largestNum=0;
1569 for(i=0;i<contents.length;i++){
1570 let content=contents[i],hasText=false,allSingle=true,item,curNum=0;
1571 if(/footer/.test(content.className))continue;
1572 for(j=content.childNodes.length-1;j>=0;j--){
1573 item=content.childNodes[j];
1574 if(item.nodeType==3){
1575 if(/^\s*$/.test(item.data)){
1576 item.innerHTML="";
1577 }else hasText=true;
1578 }else if(/^(I|A|STRONG|B|FONT|P|DL|DD|H\d)$/.test(item.nodeName)){
1579 hasText=true;
1580 }else if(item.nodeType==1&&item.children.length==1&&/^(I|A|STRONG|B|FONT|P|DL|DD|H\d)$/.test(item.children[0].nodeName)){
1581 hasText=true;
1582 }
1583 }
1584 for(j=content.childNodes.length-1;j>=0;j--){
1585 item=content.childNodes[j];
1586 if(item.nodeType==1 && !/^(I|A|STRONG|B|FONT|BR)$/.test(item.nodeName) && /^[\s\-\_\?\>\|]*$/.test(item.innerHTML)){
1587 item.innerHTML="";
1588 }
1589 }
1590 if(content.childNodes.length>1){
1591 let indexItem=0;
1592 for(j=0;j<content.childNodes.length;j++){
1593 item=content.childNodes[j];
1594 if(item.nodeType==1){

Callers 2

processDocFunction · 0.85
fetchFunction · 0.85

Calls 2

endEleFunction · 0.85
getContentByLargestFunction · 0.85

Tested by

no test coverage detected