hub / github.com/fauxpilot/fauxpilot / generate

Method generate

copilot_proxy/utils/codegen.py:75–227 · view source on GitHub ↗

(self, data)

Source from the content-addressed store, hash-verified

73	return np.array([flat_ids, offsets], dtype="int32").transpose((1, 0, 2))
74
75	def generate(self, data):
76	prompt = data['prompt']
77	n = data.get('n', 1)
78	model_name = data["model"]
79	# ugly hack to set the data type correctly. Huggingface models want int32, but fastertransformer needs uint32
80	# i could've done the conversion from uint32 to int32 in the model but that'd be inefficient.
81	np_type = np.int32 if model_name.startswith("py-") else np.uint32
82
83	input_start_ids = np.expand_dims(self.tokenizer.encode(prompt).ids, 0)
84	input_start_ids = np.repeat(input_start_ids, n, axis=0).astype(np_type)
85	prompt_len = input_start_ids.shape[1]
86	input_len = prompt_len * np.ones([input_start_ids.shape[0], 1]).astype(np_type)
87	max_tokens = data.get('max_tokens', 16)
88	prompt_tokens: int = input_len[0][0]
89	requested_tokens = max_tokens + prompt_tokens
90	if requested_tokens > self.MAX_MODEL_LEN:
91	print(1)
92	raise self.TokensExceedsMaximum(
93	f"This model's maximum context length is {self.MAX_MODEL_LEN}, however you requested "
94	f"{requested_tokens} tokens ({prompt_tokens} in your prompt; {max_tokens} for the completion). "
95	f"Please reduce your prompt; or completion length."
96	)
97	output_len = np.ones_like(input_len).astype(np_type) * max_tokens
98	num_logprobs = data.get('logprobs', -1)
99	if num_logprobs is None:
100	num_logprobs = -1
101	want_logprobs = num_logprobs > 0
102
103	temperature = data.get('temperature', 0.2)
104	if temperature == 0.0:
105	temperature = 1.0
106	top_k = 1
107	else:
108	top_k = data.get('top_k', 0)
109
110	top_p = data.get('top_p', 1.0)
111	frequency_penalty = data.get('frequency_penalty', 1.0)
112	runtime_top_k = top_k * np.ones([input_start_ids.shape[0], 1]).astype(np_type)
113	runtime_top_p = top_p * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
114	beam_search_diversity_rate = 0.0 * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
115	random_seed = np.random.randint(0, 2 ** 31 - 1, (input_start_ids.shape[0], 1), dtype=np.int32)
116	temperature = temperature * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
117	len_penalty = 1.0 * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
118	repetition_penalty = frequency_penalty * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
119	is_return_log_probs = want_logprobs * np.ones([input_start_ids.shape[0], 1]).astype(np.bool_)
120	beam_width = (1 * np.ones([input_start_ids.shape[0], 1])).astype(np_type)
121	start_ids = self.PAD_CHAR * np.ones([input_start_ids.shape[0], 1]).astype(np_type)
122	end_ids = self.PAD_CHAR * np.ones([input_start_ids.shape[0], 1]).astype(np_type)
123
124	stop_words = data.get('stop', [])
125	if stop_words is None:
126	stop_words = []
127	if stop_words:
128	stop_word_list = np.repeat(self.to_word_list_format([stop_words], self.tokenizer), input_start_ids.shape[0],
129	axis=0)
130	else:
131	stop_word_list = np.concatenate([np.zeros([input_start_ids.shape[0], 1, 1]).astype(
132	np.int32), (-1 * np.ones([input_start_ids.shape[0], 1, 1])).astype(np.int32)], axis=1)

Callers 2

__call__Method · 0.95

executeMethod · 0.80

Calls 3

to_word_list_formatMethod · 0.95

prepare_tensorMethod · 0.95

trim_with_stopwordsMethod · 0.95

Tested by

no test coverage detected