MCPcopy
hub / github.com/garrytan/gstack / getCleanTextWithStripping

Function getCleanTextWithStripping

browse/src/content-security.ts:170–186  ·  view source on GitHub ↗
(page: Page | Frame)

Source from the content-addressed store, hash-verified

168 * Uses clone + remove approach: clones body, removes marked elements, returns innerText.
169 */
170export async function getCleanTextWithStripping(page: Page | Frame): Promise<string> {
171 const raw = await page.evaluate(() => {
172 const body = document.body;
173 if (!body) return '';
174 const clone = body.cloneNode(true) as HTMLElement;
175 // Remove standard noise elements
176 clone.querySelectorAll('script, style, noscript, svg').forEach(el => el.remove());
177 // Remove hidden-marked elements
178 clone.querySelectorAll('[data-gstack-hidden]').forEach(el => el.remove());
179 return clone.innerText
180 .split('\n')
181 .map(line => line.trim())
182 .filter(line => line.length > 0)
183 .join('\n');
184 });
185 return stripLoneSurrogates(raw);
186}
187
188/**
189 * Clean up data-gstack-hidden attributes from the page.

Calls 1

stripLoneSurrogatesFunction · 0.90

Tested by

no test coverage detected