MCPcopy
hub / github.com/PaddlePaddle/PaddleNLP / forward

Method forward

paddlenlp/experimental/faster_tokenizer.py:96–142  ·  view source on GitHub ↗
(self, text, text_pair=None, max_seq_len=0, pad_to_max_seq_len=False)

Source from the content-addressed store, hash-verified

94 self.is_split_into_words = is_split_into_words
95
96 def forward(self, text, text_pair=None, max_seq_len=0, pad_to_max_seq_len=False):
97 if paddle.in_dynamic_mode():
98 if isinstance(text, list) or isinstance(text, tuple):
99 text = to_tensor(list(text))
100 if text_pair is not None:
101 if isinstance(text_pair, list) or isinstance(text_pair, tuple):
102 text_pair = to_tensor(list(text_pair))
103 input_ids, seg_ids = self.mod.faster_tokenizer(
104 self.vocab,
105 text,
106 text_pair,
107 "do_lower_case",
108 self.do_lower_case,
109 "max_seq_len",
110 max_seq_len,
111 "pad_to_max_seq_len",
112 pad_to_max_seq_len,
113 "is_split_into_words",
114 self.is_split_into_words,
115 )
116
117 return input_ids, seg_ids
118
119 attrs = {
120 "do_lower_case": self.do_lower_case,
121 "max_seq_len": max_seq_len,
122 "pad_to_max_seq_len": pad_to_max_seq_len,
123 "is_split_into_words": self.is_split_into_words,
124 }
125 helper = LayerHelper("faster_tokenizer")
126 input_ids = helper.create_variable_for_type_inference(dtype="int64")
127 seg_ids = helper.create_variable_for_type_inference(dtype="int64")
128 if text_pair is None:
129 helper.append_op(
130 type="faster_tokenizer",
131 inputs={"Vocab": self.vocab, "Text": text},
132 outputs={"InputIds": input_ids, "SegmentIds": seg_ids},
133 attrs=attrs,
134 )
135 else:
136 helper.append_op(
137 type="faster_tokenizer",
138 inputs={"Vocab": self.vocab, "Text": text, "TextPair": text_pair},
139 outputs={"InputIds": input_ids, "SegmentIds": seg_ids},
140 attrs=attrs,
141 )
142 return input_ids, seg_ids
143
144 @classmethod
145 def from_pretrained(cls, name):

Callers

nothing calls this directly

Calls 1

to_tensorFunction · 0.85

Tested by

no test coverage detected