Serialize the DocBin's annotations to a bytestring. RETURNS (bytes): The serialized DocBin. DOCS: https://spacy.io/api/docbin#to_bytes
(self)
| 189 | self.user_data.extend(other.user_data) |
| 190 | |
| 191 | def to_bytes(self) -> bytes: |
| 192 | """Serialize the DocBin's annotations to a bytestring. |
| 193 | |
| 194 | RETURNS (bytes): The serialized DocBin. |
| 195 | |
| 196 | DOCS: https://spacy.io/api/docbin#to_bytes |
| 197 | """ |
| 198 | for tokens in self.tokens: |
| 199 | assert len(tokens.shape) == 2, tokens.shape # this should never happen |
| 200 | lengths = [len(tokens) for tokens in self.tokens] |
| 201 | tokens = numpy.vstack(self.tokens) if self.tokens else numpy.asarray([]) |
| 202 | spaces = numpy.vstack(self.spaces) if self.spaces else numpy.asarray([]) |
| 203 | msg = { |
| 204 | "version": self.version, |
| 205 | "attrs": self.attrs, |
| 206 | "tokens": tokens.tobytes("C"), |
| 207 | "spaces": spaces.tobytes("C"), |
| 208 | "lengths": numpy.asarray(lengths, dtype="int32").tobytes("C"), |
| 209 | "strings": sorted(self.strings), |
| 210 | "cats": self.cats, |
| 211 | "flags": self.flags, |
| 212 | "span_groups": self.span_groups, |
| 213 | } |
| 214 | if self.store_user_data: |
| 215 | msg["user_data"] = self.user_data |
| 216 | return zlib.compress(srsly.msgpack_dumps(msg)) |
| 217 | |
| 218 | def from_bytes(self, bytes_data: bytes) -> "DocBin": |
| 219 | """Deserialize the DocBin's annotations from a bytestring. |
no outgoing calls