Function extractTextByContentType

sdk/src/tools/read-url.ts:268–307 · view source on GitHub ↗

(
  contentType: string,
  body: string,
)

Source from the content-addressed store, hash-verified

266	}
267
268	function extractTextByContentType(
269	contentType: string,
270	body: string,
271	): {
272	title?: string
273	description?: string
274	text: string
275	} {
276	const lowerContentType = contentType.toLowerCase()
277
278	if (
279	lowerContentType.includes('text/html') \|\|
280	lowerContentType.includes('application/xhtml')
281	) {
282	return extractHtml(body)
283	}
284
285	if (isJsonContentType(lowerContentType)) {
286	try {
287	return { text: JSON.stringify(JSON.parse(body), null, 2) }
288	} catch {
289	return { text: normalizeText(body) }
290	}
291	}
292
293	if (isMarkdownContentType(lowerContentType)) {
294	return extractMarkdownFrontmatter(body)
295	}
296
297	if (
298	lowerContentType.startsWith('text/') \|\|
299	lowerContentType.includes('application/xml') \|\|
300	lowerContentType.includes('application/rss+xml') \|\|
301	lowerContentType.includes('application/atom+xml')
302	) {
303	return { text: normalizeText(body) }
304	}
305
306	return { text: normalizeText(body) }
307	}
308
309	function truncateText(
310	text: string,

readUrlFunction · 0.85

extractHtmlFunction · 0.85

isJsonContentTypeFunction · 0.85

normalizeTextFunction · 0.85

isMarkdownContentTypeFunction · 0.85

extractMarkdownFrontmatterFunction · 0.85

parseMethod · 0.80

no test coverage detected