Method add

spacy/tokens/_serialize.py:91–127 · view source on GitHub ↗

Add a Doc's annotations to the DocBin for serialization. doc (Doc): The Doc object to add. DOCS: https://spacy.io/api/docbin#add

(self, doc: Doc)

Source from the content-addressed store, hash-verified

89	return len(self.tokens)
90
91	def add(self, doc: Doc) -> None:
92	"""Add a Doc's annotations to the DocBin for serialization.
93
94	doc (Doc): The Doc object to add.
95
96	DOCS: https://spacy.io/api/docbin#add
97	"""
98	array = doc.to_array(self.attrs)
99	if len(array.shape) == 1:
100	array = array.reshape((array.shape[0], 1))
101	self.tokens.append(array)
102	spaces = doc.to_array(SPACY)
103	assert array.shape[0] == spaces.shape[0] # this should never happen
104	spaces = spaces.reshape((spaces.shape[0], 1))
105	self.spaces.append(numpy.asarray(spaces, dtype=bool))
106	self.flags.append({"has_unknown_spaces": doc.has_unknown_spaces})
107	for token in doc:
108	self.strings.add(token.text)
109	self.strings.add(token.tag_)
110	self.strings.add(token.lemma_)
111	self.strings.add(token.norm_)
112	self.strings.add(str(token.morph))
113	self.strings.add(token.dep_)
114	self.strings.add(token.ent_type_)
115	self.strings.add(token.ent_kb_id_)
116	self.strings.add(token.ent_id_)
117	self.cats.append(doc.cats)
118	if self.store_user_data:
119	self.user_data.append(srsly.msgpack_dumps(doc.user_data))
120	self.span_groups.append(doc.spans.to_bytes())
121	for key, group in doc.spans.items():
122	for span in group:
123	self.strings.add(span.label_)
124	if span.kb_id in span.doc.vocab.strings:
125	self.strings.add(span.kb_id_)
126	if span.id in span.doc.vocab.strings:
127	self.strings.add(span.id_)
128
129	def get_docs(self, vocab: Vocab) -> Iterator[Doc]:
130	"""Recover Doc objects from the annotations, using the given vocab.

applyFunction · 0.95

__init__Method · 0.95

test_applycli_docbinFunction · 0.95

test_applycli_mixedFunction · 0.95

test_applycli_user_dataFunction · 0.95

test_issue4528Function · 0.95

test_serialize_doc_binFunction · 0.95

test_serialize_custom_extensionFunction · 0.95

write_sample_trainingFunction · 0.95

create_pipe_from_sourceMethod · 0.45

disable_pipeMethod · 0.45

__setitem__Method · 0.45

appendMethod · 0.80

to_bytesMethod · 0.45

test_applycli_docbinFunction · 0.76

test_applycli_mixedFunction · 0.76

test_applycli_user_dataFunction · 0.76

test_issue4528Function · 0.76

test_serialize_doc_binFunction · 0.76

test_serialize_custom_extensionFunction · 0.76

write_sample_trainingFunction · 0.76

test_language_source_and_vectorsFunction · 0.36

matcherFunction · 0.36

test_matcher_from_api_docsFunction · 0.36

test_matcher_from_usage_docsFunction · 0.36

test_matcher_len_containsFunction · 0.36