MCPcopy Index your code
hub / github.com/simstudioai/sim / processElement

Method processElement

apps/sim/lib/file-parsers/html-parser.ts:96–216  ·  view source on GitHub ↗

* Recursively process elements to extract text with structure

(
    $: cheerio.CheerioAPI,
    element: cheerio.Cheerio<any>,
    contentParts: string[],
    depth: number
  )

Source from the content-addressed store, hash-verified

94 * Recursively process elements to extract text with structure
95 */
96 private processElement(
97 $: cheerio.CheerioAPI,
98 element: cheerio.Cheerio<any>,
99 contentParts: string[],
100 depth: number
101 ): void {
102 element.contents().each((_, node) => {
103 if (node.type === 'text') {
104 const text = $(node).text().trim()
105 if (text) {
106 contentParts.push(text)
107 }
108 } else if (node.type === 'tag') {
109 const $node = $(node)
110 const tagName = node.tagName?.toLowerCase()
111
112 switch (tagName) {
113 case 'h1':
114 case 'h2':
115 case 'h3':
116 case 'h4':
117 case 'h5':
118 case 'h6': {
119 const headingText = $node.text().trim()
120 if (headingText) {
121 contentParts.push(`\n${headingText}\n`)
122 }
123 break
124 }
125
126 case 'p': {
127 const paragraphText = $node.text().trim()
128 if (paragraphText) {
129 contentParts.push(`${paragraphText}\n`)
130 }
131 break
132 }
133
134 case 'br':
135 contentParts.push('\n')
136 break
137
138 case 'hr':
139 contentParts.push('\n---\n')
140 break
141
142 case 'li': {
143 const listItemText = $node.text().trim()
144 if (listItemText) {
145 const indent = ' '.repeat(Math.min(depth, 3))
146 contentParts.push(`${indent}• ${listItemText}`)
147 }
148 break
149 }
150
151 case 'ul':
152 case 'ol':
153 contentParts.push('\n')

Callers 1

extractStructuredTextMethod · 0.95

Calls 5

processTableMethod · 0.95
textMethod · 0.80
attrMethod · 0.80
$Function · 0.50
pushMethod · 0.45

Tested by

no test coverage detected