(
pdfMeta: any,
opts: { urlSource?: string | null; statusCode?: number; htmlForFallback?: string | null },
)
| 55 | }; |
| 56 | |
| 57 | export function buildHtmlLikeMetadataFromPdf( |
| 58 | pdfMeta: any, |
| 59 | opts: { urlSource?: string | null; statusCode?: number; htmlForFallback?: string | null }, |
| 60 | ): HtmlLikeMetadata { |
| 61 | const { urlSource = null, statusCode = 200, htmlForFallback = null } = opts; |
| 62 | |
| 63 | // Try to get a title from meta, fallback to <title> in converted HTML |
| 64 | let htmlTitle: string | null = null; |
| 65 | if (htmlForFallback) { |
| 66 | const $ = loadHtml(htmlForFallback); |
| 67 | const t = $("title").first().text()?.trim(); |
| 68 | htmlTitle = t || null; |
| 69 | } |
| 70 | |
| 71 | const title = pdfMeta?.title || htmlTitle || null; |
| 72 | const author = pdfMeta?.author || null; |
| 73 | const description = pdfMeta?.subject || null; |
| 74 | |
| 75 | // Keywords might be array or string depending on library |
| 76 | let keywords: string | null = null; |
| 77 | if (Array.isArray(pdfMeta?.keywords)) { |
| 78 | keywords = pdfMeta.keywords.join(", "); |
| 79 | } else if (typeof pdfMeta?.keywords === "string") { |
| 80 | keywords = pdfMeta.keywords; |
| 81 | } |
| 82 | |
| 83 | // XMP/DC language if exposed; often not present |
| 84 | const language = pdfMeta?.language || pdfMeta?.["dc:language"] || null; |
| 85 | |
| 86 | const publishedTime = |
| 87 | parsePdfDate(pdfMeta?.creationDate || pdfMeta?.CreationDate || pdfMeta?.["xmp:CreateDate"]) || |
| 88 | null; |
| 89 | const modifiedTime = |
| 90 | parsePdfDate(pdfMeta?.modDate || pdfMeta?.ModDate || pdfMeta?.["xmp:ModifyDate"]) || null; |
| 91 | |
| 92 | let origin: string | null = null; |
| 93 | let host: string | null = null; |
| 94 | if (urlSource) { |
| 95 | try { |
| 96 | const u = new URL(urlSource); |
| 97 | origin = u.origin; |
| 98 | host = u.hostname; |
| 99 | } catch {} |
| 100 | } |
| 101 | |
| 102 | return { |
| 103 | title, |
| 104 | language, |
| 105 | urlSource, |
| 106 | timestamp: new Date().toISOString(), |
| 107 | |
| 108 | description, |
| 109 | keywords, |
| 110 | author, |
| 111 | |
| 112 | ogTitle: title, |
| 113 | ogDescription: description, |
| 114 | ogImage: null, |
no test coverage detected