| 250 | } |
| 251 | |
| 252 | async function extractPdfStyle(buffer: Buffer): Promise<DocumentStyleSummary | null> { |
| 253 | try { |
| 254 | const { PDFDocument, PDFName, PDFDict } = await import('pdf-lib') |
| 255 | |
| 256 | let doc: Awaited<ReturnType<typeof PDFDocument.load>> |
| 257 | try { |
| 258 | doc = await PDFDocument.load(buffer, { updateMetadata: false }) |
| 259 | } catch { |
| 260 | // Encrypted or corrupt |
| 261 | return null |
| 262 | } |
| 263 | |
| 264 | const pages = doc.getPages() |
| 265 | if (pages.length === 0) return null |
| 266 | |
| 267 | // Page dimensions (first page is canonical for preset detection) |
| 268 | const { width: widthPt, height: heightPt } = pages[0].getSize() |
| 269 | let preset: 'A4' | 'letter' | 'custom' = 'custom' |
| 270 | if (Math.abs(widthPt - 595.28) < 5 && Math.abs(heightPt - 841.89) < 5) preset = 'A4' |
| 271 | else if (Math.abs(widthPt - 612) < 5 && Math.abs(heightPt - 792) < 5) preset = 'letter' |
| 272 | |
| 273 | // Font names from page resource dictionaries (first 10 pages to bound cost) |
| 274 | const rawFontNames = new Set<string>() |
| 275 | const pagesToScan = Math.min(pages.length, 10) |
| 276 | for (let i = 0; i < pagesToScan; i++) { |
| 277 | try { |
| 278 | const resourcesRef = pages[i].node.get(PDFName.of('Resources')) |
| 279 | if (!resourcesRef) continue |
| 280 | const resources = doc.context.lookup(resourcesRef, PDFDict) |
| 281 | if (!resources) continue |
| 282 | const fontDictRef = resources.get(PDFName.of('Font')) |
| 283 | if (!fontDictRef) continue |
| 284 | const fontDict = doc.context.lookup(fontDictRef, PDFDict) |
| 285 | if (!fontDict) continue |
| 286 | for (const key of fontDict.keys()) { |
| 287 | try { |
| 288 | const fontRef = fontDict.get(key) |
| 289 | if (!fontRef) continue |
| 290 | const fontObj = doc.context.lookup(fontRef, PDFDict) |
| 291 | if (!fontObj) continue |
| 292 | const baseFontRef = fontObj.get(PDFName.of('BaseFont')) |
| 293 | if (!baseFontRef) continue |
| 294 | // Format: "/ABCDEF+FontName" (subset) or "/FontName" (full embed) |
| 295 | const raw = baseFontRef |
| 296 | .toString() |
| 297 | .replace(/^\//, '') |
| 298 | .replace(/^[A-Z]{6}\+/, '') |
| 299 | if (raw) rawFontNames.add(raw) |
| 300 | } catch {} |
| 301 | } |
| 302 | } catch {} |
| 303 | } |
| 304 | |
| 305 | // Normalize to unique font family names by stripping PostScript weight/style suffixes. |
| 306 | // Apply the strip in a loop to handle compound suffixes (e.g. SemiBoldItalic, LightOblique). |
| 307 | // BoldMT must precede Bold, Oblique must precede the simple form, etc. |
| 308 | const SUFFIX_RX = |
| 309 | /[-]?(BoldMT|BoldOblique|BoldItalic|SemiBoldItalic|ExtraBoldItalic|LightItalic|LightOblique|MediumItalic|Regular|ExtraBold|SemiBold|Medium|Black|Light|Bold|Italic|Oblique|Condensed|Expanded|MT)$/i |