MCPcopy
hub / github.com/llmware-ai/llmware / QualityCheck

Class QualityCheck

llmware/prompts.py:1264–1913  ·  view source on GitHub ↗

Implements the validation between the output of the LLM and the context used to generate the response, which is used by the ``Prompt`` class. ``QualityCheck`` allows for the comparison of LLM generated responses with the context that was used to create the response. Concretely, it is qu

Source from the content-addressed store, hash-verified

1262
1263
1264class QualityCheck:
1265 """Implements the validation between the output of the LLM and the context used to generate the response,
1266 which is used by the ``Prompt`` class.
1267
1268 ``QualityCheck`` allows for the comparison of LLM generated responses with the context that was used to
1269 create the response. Concretely, it is quality verifying mechanism used by the ``Prompt`` class.
1270 One use case is to verify that reported numbers in the response appear in the context.
1271
1272 Parameters
1273 ----------
1274 prompt : object, default=None
1275 An object of type ``Prompt``.
1276
1277 Examples
1278 ----------
1279 >>> import os
1280 >>> from llmware.setup import Setup
1281 >>> from llmware.library import Library
1282 >>> from llmware.prompts import Prompt
1283 >>> library = Library().create_new_library('prompt_with_sources')
1284 >>> sample_files_path = Setup().load_sample_files(over_write=False)
1285 >>> parsing_output = library.add_files(os.path.join(sample_files_path, "Agreements"))
1286 >>> prompter = Prompt().load_model('llmware/bling-1b-0.1')
1287 >>> prompter.add_source_document(os.path.join(sample_files_path, "Agreements"), 'Apollo EXECUTIVE EMPLOYMENT AGREEMENT.pdf')
1288 >>> result = prompter.prompt_with_source(prompt='What is the base salery amount?', prompt_name='default_with_context')
1289 >>> result[0]['llm_response']
1290 ' $1,000,000.00'
1291 >>> ev_numbers = prompter.evidence_check_numbers(result)
1292 >>> ev_numbers[0].keys()
1293 dict_keys(['llm_response', 'prompt', 'evidence', 'instruction', 'model',
1294 'usage', 'time_stamp', 'calling_app_ID', 'rating', 'account_name',
1295 'prompt_id', 'batch_id', 'evidence_metadata', 'biblio', 'event_type',
1296 'human_feedback', 'human_assessed_accuracy',
1297 'fact_check'])
1298 >>> ev_numbers[0]['fact_check']
1299 [{'fact': 'detail.', 'status': 'Not Confirmed', 'text': '', 'page_num': '', 'source': ''}]
1300 >>> ev_sources = prompter.evidence_check_sources(result)
1301 >>> ev_sources[0].keys()
1302 dict_keys(['llm_response', 'prompt', 'evidence', 'instruction', 'model',
1303 'usage', 'time_stamp', 'calling_app_ID', 'rating', 'account_name',
1304 'prompt_id', 'batch_id', 'evidence_metadata', 'biblio', 'event_type',
1305 'human_feedback', 'human_assessed_accuracy',
1306 'fact_check', 'source_review'])
1307 >>> ev_sources[0]['source_review']
1308 []
1309 >>> ev_stats = prompter.evidence_comparison_stats(result)
1310 >>> ev_stats[0].keys()
1311 dict_keys(['llm_response', 'prompt', 'evidence', 'instruction', 'model',
1312 'usage', 'time_stamp', 'calling_app_ID', 'rating', 'account_name',
1313 'prompt_id', 'batch_id', 'evidence_metadata', 'biblio', 'event_type',
1314 'human_feedback', 'human_assessed_accuracy', 'fact_check', 'source_review', 'comparison_stats'])
1315 >>> ev_stats[0]['comparison_stats']
1316 {'percent_display': '0.0%', 'confirmed_words': [],
1317 'unconfirmed_words': ['1000000.00'], 'verified_token_match_ratio': 0.0,
1318 'key_point_list': [{'key_point': ' $1,000,000.00', 'entry': 0, 'verified_match': 0.0}]}
1319 """
1320 def __init__(self, prompt=None):
1321

Calls

no outgoing calls

Tested by

no test coverage detected