Class TelechatIterTextStreamer

models/12B_4bit/generation_utils.py:67–162 · view source on GitHub ↗

With reference to the TextIterStreamers in transformers, we have rewritten this class

Source from the content-addressed store, hash-verified

65
66
67	class TelechatIterTextStreamer:
68	"""
69	With reference to the TextIterStreamers in transformers, we have rewritten this class
70	"""
71
72	def __init__(
73	self, tokenizer, history: History = None, skip_prompt: bool = False, timeout: Optional[float] = None,
74	**decode_kwargs
75	):
76
77	self.tokenizer = tokenizer
78	self.history = history
79	self.skip_prompt = skip_prompt
80	self.timeout = timeout
81	self.decode_kwargs = decode_kwargs
82
83	self.text_queue = Queue()
84	self.cache_time = 0
85	self.text_until = ""
86	self.token_until = []
87	self.stop_signal = None
88	self.next_tokens_are_prompt = True
89
90	self.history.append({"role": "bot", "content": self.text_until})
91
92	def put(self, value):
93	"""
94	put printable text into queue
95	"""
96	if len(value.shape) > 1 and value.shape[0] > 1:
97	raise ValueError("TextStreamer only supports batch size 1")
98	elif len(value.shape) > 1:
99	value = value[0]
100
101	if self.skip_prompt and self.next_tokens_are_prompt:
102	self.next_tokens_are_prompt = False
103	return
104
105	if value[-1] == self.tokenizer.eos_token_id:
106	return
107
108	# there may be some smart way to decode.
109	self.token_until.extend(value.tolist())
110	text = self.tokenizer.decode(self.token_until, **self.decode_kwargs)
111
112
113	if self._is_printable(text) or self.cache_time >= 6:
114	output_text = text[len(self.text_until):]
115	self.text_until = text
116
117	else:
118	self.cache_time+=1
119	return
120
121	self.on_finalized_text(output_text)
122
123	def end(self):
124	"""Flushes any remaining cache and prints a newline to stdout."""

chatMethod · 0.70

no outgoing calls

no test coverage detected