MCPcopy
hub / github.com/explosion/spaCy / add

Method add

spacy/tokens/_serialize.py:91–127  ·  view source on GitHub ↗

Add a Doc's annotations to the DocBin for serialization. doc (Doc): The Doc object to add. DOCS: https://spacy.io/api/docbin#add

(self, doc: Doc)

Source from the content-addressed store, hash-verified

89 return len(self.tokens)
90
91 def add(self, doc: Doc) -> None:
92 """Add a Doc's annotations to the DocBin for serialization.
93
94 doc (Doc): The Doc object to add.
95
96 DOCS: https://spacy.io/api/docbin#add
97 """
98 array = doc.to_array(self.attrs)
99 if len(array.shape) == 1:
100 array = array.reshape((array.shape[0], 1))
101 self.tokens.append(array)
102 spaces = doc.to_array(SPACY)
103 assert array.shape[0] == spaces.shape[0] # this should never happen
104 spaces = spaces.reshape((spaces.shape[0], 1))
105 self.spaces.append(numpy.asarray(spaces, dtype=bool))
106 self.flags.append({"has_unknown_spaces": doc.has_unknown_spaces})
107 for token in doc:
108 self.strings.add(token.text)
109 self.strings.add(token.tag_)
110 self.strings.add(token.lemma_)
111 self.strings.add(token.norm_)
112 self.strings.add(str(token.morph))
113 self.strings.add(token.dep_)
114 self.strings.add(token.ent_type_)
115 self.strings.add(token.ent_kb_id_)
116 self.strings.add(token.ent_id_)
117 self.cats.append(doc.cats)
118 if self.store_user_data:
119 self.user_data.append(srsly.msgpack_dumps(doc.user_data))
120 self.span_groups.append(doc.spans.to_bytes())
121 for key, group in doc.spans.items():
122 for span in group:
123 self.strings.add(span.label_)
124 if span.kb_id in span.doc.vocab.strings:
125 self.strings.add(span.kb_id_)
126 if span.id in span.doc.vocab.strings:
127 self.strings.add(span.id_)
128
129 def get_docs(self, vocab: Vocab) -> Iterator[Doc]:
130 """Recover Doc objects from the annotations, using the given vocab.

Callers 15

applyFunction · 0.95
__init__Method · 0.95
test_applycli_docbinFunction · 0.95
test_applycli_mixedFunction · 0.95
test_applycli_user_dataFunction · 0.95
test_issue4528Function · 0.95
test_serialize_doc_binFunction · 0.95
write_sample_trainingFunction · 0.95
disable_pipeMethod · 0.45
__setitem__Method · 0.45

Calls 2

appendMethod · 0.80
to_bytesMethod · 0.45

Tested by 15

test_applycli_docbinFunction · 0.76
test_applycli_mixedFunction · 0.76
test_applycli_user_dataFunction · 0.76
test_issue4528Function · 0.76
test_serialize_doc_binFunction · 0.76
write_sample_trainingFunction · 0.76
matcherFunction · 0.36