Implements the validation between the output of the LLM and the context used to generate the response, which is used by the ``Prompt`` class. ``QualityCheck`` allows for the comparison of LLM generated responses with the context that was used to create the response. Concretely, it is qu
| 1262 | |
| 1263 | |
| 1264 | class QualityCheck: |
| 1265 | """Implements the validation between the output of the LLM and the context used to generate the response, |
| 1266 | which is used by the ``Prompt`` class. |
| 1267 | |
| 1268 | ``QualityCheck`` allows for the comparison of LLM generated responses with the context that was used to |
| 1269 | create the response. Concretely, it is quality verifying mechanism used by the ``Prompt`` class. |
| 1270 | One use case is to verify that reported numbers in the response appear in the context. |
| 1271 | |
| 1272 | Parameters |
| 1273 | ---------- |
| 1274 | prompt : object, default=None |
| 1275 | An object of type ``Prompt``. |
| 1276 | |
| 1277 | Examples |
| 1278 | ---------- |
| 1279 | >>> import os |
| 1280 | >>> from llmware.setup import Setup |
| 1281 | >>> from llmware.library import Library |
| 1282 | >>> from llmware.prompts import Prompt |
| 1283 | >>> library = Library().create_new_library('prompt_with_sources') |
| 1284 | >>> sample_files_path = Setup().load_sample_files(over_write=False) |
| 1285 | >>> parsing_output = library.add_files(os.path.join(sample_files_path, "Agreements")) |
| 1286 | >>> prompter = Prompt().load_model('llmware/bling-1b-0.1') |
| 1287 | >>> prompter.add_source_document(os.path.join(sample_files_path, "Agreements"), 'Apollo EXECUTIVE EMPLOYMENT AGREEMENT.pdf') |
| 1288 | >>> result = prompter.prompt_with_source(prompt='What is the base salery amount?', prompt_name='default_with_context') |
| 1289 | >>> result[0]['llm_response'] |
| 1290 | ' $1,000,000.00' |
| 1291 | >>> ev_numbers = prompter.evidence_check_numbers(result) |
| 1292 | >>> ev_numbers[0].keys() |
| 1293 | dict_keys(['llm_response', 'prompt', 'evidence', 'instruction', 'model', |
| 1294 | 'usage', 'time_stamp', 'calling_app_ID', 'rating', 'account_name', |
| 1295 | 'prompt_id', 'batch_id', 'evidence_metadata', 'biblio', 'event_type', |
| 1296 | 'human_feedback', 'human_assessed_accuracy', |
| 1297 | 'fact_check']) |
| 1298 | >>> ev_numbers[0]['fact_check'] |
| 1299 | [{'fact': 'detail.', 'status': 'Not Confirmed', 'text': '', 'page_num': '', 'source': ''}] |
| 1300 | >>> ev_sources = prompter.evidence_check_sources(result) |
| 1301 | >>> ev_sources[0].keys() |
| 1302 | dict_keys(['llm_response', 'prompt', 'evidence', 'instruction', 'model', |
| 1303 | 'usage', 'time_stamp', 'calling_app_ID', 'rating', 'account_name', |
| 1304 | 'prompt_id', 'batch_id', 'evidence_metadata', 'biblio', 'event_type', |
| 1305 | 'human_feedback', 'human_assessed_accuracy', |
| 1306 | 'fact_check', 'source_review']) |
| 1307 | >>> ev_sources[0]['source_review'] |
| 1308 | [] |
| 1309 | >>> ev_stats = prompter.evidence_comparison_stats(result) |
| 1310 | >>> ev_stats[0].keys() |
| 1311 | dict_keys(['llm_response', 'prompt', 'evidence', 'instruction', 'model', |
| 1312 | 'usage', 'time_stamp', 'calling_app_ID', 'rating', 'account_name', |
| 1313 | 'prompt_id', 'batch_id', 'evidence_metadata', 'biblio', 'event_type', |
| 1314 | 'human_feedback', 'human_assessed_accuracy', 'fact_check', 'source_review', 'comparison_stats']) |
| 1315 | >>> ev_stats[0]['comparison_stats'] |
| 1316 | {'percent_display': '0.0%', 'confirmed_words': [], |
| 1317 | 'unconfirmed_words': ['1000000.00'], 'verified_token_match_ratio': 0.0, |
| 1318 | 'key_point_list': [{'key_point': ' $1,000,000.00', 'entry': 0, 'verified_match': 0.0}]} |
| 1319 | """ |
| 1320 | def __init__(self, prompt=None): |
| 1321 |
no outgoing calls
no test coverage detected