(self, needle, context, depth_percent, context_length)
| 164 | return results |
| 165 | |
| 166 | def insert_needle(self, needle, context, depth_percent, context_length): |
| 167 | tokens_needle = self.enc_tiktoken.encode(needle) |
| 168 | tokens_context = self.enc_tiktoken.encode(context) |
| 169 | |
| 170 | # Reducing the context length by 150 buffer. This is to account for system message, the user question, and response. |
| 171 | context_length -= self.final_context_length_buffer |
| 172 | |
| 173 | # If your context + needle are longer than the context length (which it will be), then reduce tokens from the context by the needle length |
| 174 | if len(tokens_context) + len(tokens_needle) > context_length: |
| 175 | tokens_context = tokens_context[:context_length - len(tokens_needle)] |
| 176 | |
| 177 | if depth_percent == 100: |
| 178 | # If your depth percent is 100 (which means your needle is the last thing in the doc), throw it at the end |
| 179 | tokens_new_context = tokens_context + tokens_needle |
| 180 | else: |
| 181 | # Go get the position (in terms of tokens) to insert your needle |
| 182 | insertion_point = int(len(tokens_context) * (depth_percent / 100)) |
| 183 | |
| 184 | # tokens_new_context represents the tokens before the needle |
| 185 | tokens_new_context = tokens_context[:insertion_point] |
| 186 | |
| 187 | # We want to make sure that we place our needle at a sentence break so we first see what token a '.' is |
| 188 | period_tokens = self.enc_tiktoken.encode('.') |
| 189 | |
| 190 | # Then we iteration backwards until we find the first period |
| 191 | while tokens_new_context and tokens_new_context[-1] not in period_tokens: |
| 192 | insertion_point -= 1 |
| 193 | tokens_new_context = tokens_context[:insertion_point] |
| 194 | |
| 195 | # Once we get there, then add in your needle, and stick the rest of your context in on the other end. |
| 196 | # Now we have a needle in a haystack |
| 197 | tokens_new_context += tokens_needle + tokens_context[insertion_point:] |
| 198 | |
| 199 | # Convert back to a string and return it |
| 200 | new_context = self.enc_tiktoken.decode(tokens_new_context) |
| 201 | return new_context |
| 202 | |
| 203 | def generate_context(self, needle, trim_context, context_length, depth_percent): |
| 204 | context = self.insert_needle(needle, trim_context, depth_percent, context_length) |
no test coverage detected