| 56 | * It is used for indexing. |
| 57 | */ |
| 58 | export class _HashedDocument implements HashedDocumentInterface { |
| 59 | uid: string |
| 60 | |
| 61 | hash_?: string |
| 62 | |
| 63 | contentHash?: string |
| 64 | |
| 65 | metadataHash?: string |
| 66 | |
| 67 | pageContent: string |
| 68 | |
| 69 | metadata: Metadata |
| 70 | |
| 71 | constructor(fields: HashedDocumentArgs) { |
| 72 | this.uid = fields.uid |
| 73 | this.pageContent = fields.pageContent |
| 74 | this.metadata = fields.metadata |
| 75 | } |
| 76 | |
| 77 | calculateHashes(): void { |
| 78 | const forbiddenKeys = ['hash_', 'content_hash', 'metadata_hash'] |
| 79 | |
| 80 | for (const key of forbiddenKeys) { |
| 81 | if (key in this.metadata) { |
| 82 | throw new Error( |
| 83 | `Metadata cannot contain key ${key} as it is reserved for internal use. Restricted keys: [${forbiddenKeys.join(', ')}]` |
| 84 | ) |
| 85 | } |
| 86 | } |
| 87 | |
| 88 | const contentHash = this._hashStringToUUID(this.pageContent) |
| 89 | |
| 90 | try { |
| 91 | const metadataHash = this._hashNestedDictToUUID(this.metadata) |
| 92 | this.contentHash = contentHash |
| 93 | this.metadataHash = metadataHash |
| 94 | } catch (e) { |
| 95 | throw new Error(`Failed to hash metadata: ${e}. Please use a dict that can be serialized using json.`) |
| 96 | } |
| 97 | |
| 98 | this.hash_ = this._hashStringToUUID(this.contentHash + this.metadataHash) |
| 99 | |
| 100 | if (!this.uid) { |
| 101 | this.uid = this.hash_ |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | toDocument(): DocumentInterface { |
| 106 | return new Document({ |
| 107 | pageContent: this.pageContent, |
| 108 | metadata: this.metadata |
| 109 | }) |
| 110 | } |
| 111 | |
| 112 | static fromDocument(document: DocumentInterface, uid?: string): _HashedDocument { |
| 113 | const doc = new this({ |
| 114 | pageContent: document.pageContent, |
| 115 | metadata: document.metadata, |
nothing calls this directly
no outgoing calls
no test coverage detected