* Recursively process elements to extract text with structure
(
$: cheerio.CheerioAPI,
element: cheerio.Cheerio<any>,
contentParts: string[],
depth: number
)
| 94 | * Recursively process elements to extract text with structure |
| 95 | */ |
| 96 | private processElement( |
| 97 | $: cheerio.CheerioAPI, |
| 98 | element: cheerio.Cheerio<any>, |
| 99 | contentParts: string[], |
| 100 | depth: number |
| 101 | ): void { |
| 102 | element.contents().each((_, node) => { |
| 103 | if (node.type === 'text') { |
| 104 | const text = $(node).text().trim() |
| 105 | if (text) { |
| 106 | contentParts.push(text) |
| 107 | } |
| 108 | } else if (node.type === 'tag') { |
| 109 | const $node = $(node) |
| 110 | const tagName = node.tagName?.toLowerCase() |
| 111 | |
| 112 | switch (tagName) { |
| 113 | case 'h1': |
| 114 | case 'h2': |
| 115 | case 'h3': |
| 116 | case 'h4': |
| 117 | case 'h5': |
| 118 | case 'h6': { |
| 119 | const headingText = $node.text().trim() |
| 120 | if (headingText) { |
| 121 | contentParts.push(`\n${headingText}\n`) |
| 122 | } |
| 123 | break |
| 124 | } |
| 125 | |
| 126 | case 'p': { |
| 127 | const paragraphText = $node.text().trim() |
| 128 | if (paragraphText) { |
| 129 | contentParts.push(`${paragraphText}\n`) |
| 130 | } |
| 131 | break |
| 132 | } |
| 133 | |
| 134 | case 'br': |
| 135 | contentParts.push('\n') |
| 136 | break |
| 137 | |
| 138 | case 'hr': |
| 139 | contentParts.push('\n---\n') |
| 140 | break |
| 141 | |
| 142 | case 'li': { |
| 143 | const listItemText = $node.text().trim() |
| 144 | if (listItemText) { |
| 145 | const indent = ' '.repeat(Math.min(depth, 3)) |
| 146 | contentParts.push(`${indent}• ${listItemText}`) |
| 147 | } |
| 148 | break |
| 149 | } |
| 150 | |
| 151 | case 'ul': |
| 152 | case 'ol': |
| 153 | contentParts.push('\n') |
no test coverage detected