MCPcopy
hub / github.com/explosion/spaCy / to_bytes

Method to_bytes

spacy/tokens/_serialize.py:191–216  ·  view source on GitHub ↗

Serialize the DocBin's annotations to a bytestring. RETURNS (bytes): The serialized DocBin. DOCS: https://spacy.io/api/docbin#to_bytes

(self)

Source from the content-addressed store, hash-verified

189 self.user_data.extend(other.user_data)
190
191 def to_bytes(self) -> bytes:
192 """Serialize the DocBin's annotations to a bytestring.
193
194 RETURNS (bytes): The serialized DocBin.
195
196 DOCS: https://spacy.io/api/docbin#to_bytes
197 """
198 for tokens in self.tokens:
199 assert len(tokens.shape) == 2, tokens.shape # this should never happen
200 lengths = [len(tokens) for tokens in self.tokens]
201 tokens = numpy.vstack(self.tokens) if self.tokens else numpy.asarray([])
202 spaces = numpy.vstack(self.spaces) if self.spaces else numpy.asarray([])
203 msg = {
204 "version": self.version,
205 "attrs": self.attrs,
206 "tokens": tokens.tobytes("C"),
207 "spaces": spaces.tobytes("C"),
208 "lengths": numpy.asarray(lengths, dtype="int32").tobytes("C"),
209 "strings": sorted(self.strings),
210 "cats": self.cats,
211 "flags": self.flags,
212 "span_groups": self.span_groups,
213 }
214 if self.store_user_data:
215 msg["user_data"] = self.user_data
216 return zlib.compress(srsly.msgpack_dumps(msg))
217
218 def from_bytes(self, bytes_data: bytes) -> "DocBin":
219 """Deserialize the DocBin's annotations from a bytestring.

Callers 13

convertFunction · 0.95
to_diskMethod · 0.95
test_issue4528Function · 0.95
test_issue5141Function · 0.95
test_serialize_doc_binFunction · 0.95
parse_depsFunction · 0.45
_save_modelFunction · 0.45
copy_from_base_modelFunction · 0.45
init_vocabFunction · 0.45
addMethod · 0.45
merge_binsFunction · 0.45

Calls

no outgoing calls

Tested by 4

test_issue4528Function · 0.76
test_issue5141Function · 0.76
test_serialize_doc_binFunction · 0.76