MCPcopy
hub / github.com/fauxpilot/fauxpilot / generate

Method generate

copilot_proxy/utils/codegen.py:75–227  ·  view source on GitHub ↗
(self, data)

Source from the content-addressed store, hash-verified

73 return np.array([flat_ids, offsets], dtype="int32").transpose((1, 0, 2))
74
75 def generate(self, data):
76 prompt = data['prompt']
77 n = data.get('n', 1)
78 model_name = data["model"]
79 # ugly hack to set the data type correctly. Huggingface models want int32, but fastertransformer needs uint32
80 # i could've done the conversion from uint32 to int32 in the model but that'd be inefficient.
81 np_type = np.int32 if model_name.startswith("py-") else np.uint32
82
83 input_start_ids = np.expand_dims(self.tokenizer.encode(prompt).ids, 0)
84 input_start_ids = np.repeat(input_start_ids, n, axis=0).astype(np_type)
85 prompt_len = input_start_ids.shape[1]
86 input_len = prompt_len * np.ones([input_start_ids.shape[0], 1]).astype(np_type)
87 max_tokens = data.get('max_tokens', 16)
88 prompt_tokens: int = input_len[0][0]
89 requested_tokens = max_tokens + prompt_tokens
90 if requested_tokens > self.MAX_MODEL_LEN:
91 print(1)
92 raise self.TokensExceedsMaximum(
93 f"This model's maximum context length is {self.MAX_MODEL_LEN}, however you requested "
94 f"{requested_tokens} tokens ({prompt_tokens} in your prompt; {max_tokens} for the completion). "
95 f"Please reduce your prompt; or completion length."
96 )
97 output_len = np.ones_like(input_len).astype(np_type) * max_tokens
98 num_logprobs = data.get('logprobs', -1)
99 if num_logprobs is None:
100 num_logprobs = -1
101 want_logprobs = num_logprobs > 0
102
103 temperature = data.get('temperature', 0.2)
104 if temperature == 0.0:
105 temperature = 1.0
106 top_k = 1
107 else:
108 top_k = data.get('top_k', 0)
109
110 top_p = data.get('top_p', 1.0)
111 frequency_penalty = data.get('frequency_penalty', 1.0)
112 runtime_top_k = top_k * np.ones([input_start_ids.shape[0], 1]).astype(np_type)
113 runtime_top_p = top_p * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
114 beam_search_diversity_rate = 0.0 * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
115 random_seed = np.random.randint(0, 2 ** 31 - 1, (input_start_ids.shape[0], 1), dtype=np.int32)
116 temperature = temperature * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
117 len_penalty = 1.0 * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
118 repetition_penalty = frequency_penalty * np.ones([input_start_ids.shape[0], 1]).astype(np.float32)
119 is_return_log_probs = want_logprobs * np.ones([input_start_ids.shape[0], 1]).astype(np.bool_)
120 beam_width = (1 * np.ones([input_start_ids.shape[0], 1])).astype(np_type)
121 start_ids = self.PAD_CHAR * np.ones([input_start_ids.shape[0], 1]).astype(np_type)
122 end_ids = self.PAD_CHAR * np.ones([input_start_ids.shape[0], 1]).astype(np_type)
123
124 stop_words = data.get('stop', [])
125 if stop_words is None:
126 stop_words = []
127 if stop_words:
128 stop_word_list = np.repeat(self.to_word_list_format([stop_words], self.tokenizer), input_start_ids.shape[0],
129 axis=0)
130 else:
131 stop_word_list = np.concatenate([np.zeros([input_start_ids.shape[0], 1, 1]).astype(
132 np.int32), (-1 * np.ones([input_start_ids.shape[0], 1, 1])).astype(np.int32)], axis=1)

Callers 2

__call__Method · 0.95
executeMethod · 0.80

Calls 3

to_word_list_formatMethod · 0.95
prepare_tensorMethod · 0.95
trim_with_stopwordsMethod · 0.95

Tested by

no test coverage detected