( markdown: string, version?: string, rendering?: boolean )
| 37 | } |
| 38 | |
| 39 | export function parseMarkdown( |
| 40 | markdown: string, |
| 41 | version?: string, |
| 42 | rendering?: boolean |
| 43 | ): ParsedMarkdown { |
| 44 | // TODO: this is getting called ALOT |
| 45 | // TODO: this already a little tricky, refactor me |
| 46 | if (version?.includes('.')) { |
| 47 | // Revisions don't need any parsing, just call fix html and return |
| 48 | return { html: fixHTML(markdown) } |
| 49 | } |
| 50 | const result: ParsedMarkdown = {} |
| 51 | // TODO: 1000 is arbitrary, but often LLM's return some description |
| 52 | const header = markdown.slice(0, 1000) |
| 53 | const name = header.split('\n').find(l => l.trim().startsWith('name: ')) |
| 54 | const emoji = header.split('\n').find(l => l.trim().startsWith('emoji: ')) |
| 55 | // Mixtral sometimes started itself with ```yaml |
| 56 | // We remove double newlines to ensure html is captured as a single block |
| 57 | let cleanMarkdown = markdown.replace('```yaml\n', '').replaceAll('\n\n', '\n') |
| 58 | // TODO: this seems brittle, but seems to work, we need to do this to remove |
| 59 | // Any comments added after the closing the HTML |
| 60 | cleanMarkdown = cleanMarkdown.replace(/^(<\/div>|<\/script>)\n/m, '$1\n\n') |
| 61 | if (name) { |
| 62 | result.name = name.replace(/\s*name: /, '') |
| 63 | } |
| 64 | if (emoji) { |
| 65 | result.emoji = emoji.replace(/\s*emoji: /, '') |
| 66 | const split = markdown.indexOf('---', 10) |
| 67 | if (split > 0) { |
| 68 | cleanMarkdown = markdown.slice(Math.max(0, split + 3)) |
| 69 | } |
| 70 | } |
| 71 | /* This is supposed to prevent us from writing frontmatter to the UI |
| 72 | TODO: this is brittle, and doesn't seem to work consistently :/ |
| 73 | if (rendering && cleanMarkdown.slice(0, 1000).includes('---')) { |
| 74 | const offset = cleanMarkdown.split('---').slice(0, -1).join('---').length |
| 75 | cleanMarkdown = cleanMarkdown.slice(offset) |
| 76 | if (cleanMarkdown.slice(0, 100).includes('---')) { |
| 77 | cleanMarkdown = '' |
| 78 | } |
| 79 | } |
| 80 | */ |
| 81 | |
| 82 | const parsed = unified().use(remarkParse).parse(cleanMarkdown) |
| 83 | |
| 84 | let htmlBlocks = parsed.children.filter( |
| 85 | c => |
| 86 | (c.type === 'code' && ['html', ''].includes(c.lang ?? '')) || |
| 87 | c.type === 'html' |
| 88 | ) as Code[] |
| 89 | // TODO: maybe do this first and only if the first paragraph is chill |
| 90 | for (const c of parsed.children) { |
| 91 | if (c.type === 'paragraph') { |
| 92 | let html = '' |
| 93 | if (c.children[0].type === 'html') { |
| 94 | for (const c2 of c.children) { |
| 95 | html = html + (c2 as unknown as Code).value || '' |
| 96 | } |
no test coverage detected