MCPcopy
hub / github.com/llmware-ai/llmware / test_and_remediate_schema

Method test_and_remediate_schema

llmware/resources.py:3416–3460  ·  view source on GitHub ↗

Applies a larger test of the schema against the data held in CustomTable .rows attribute - will test the number of samples passed as an optional parameter. If auto_remediate == True (default), then it will automatically update the data type used in the schema to the 'sa

(self, samples=10, auto_remediate=True)

Source from the content-addressed store, hash-verified

3414 return dt
3415
3416 def test_and_remediate_schema(self, samples=10, auto_remediate=True):
3417
3418 """ Applies a larger test of the schema against the data held in CustomTable .rows attribute - will
3419 test the number of samples passed as an optional parameter.
3420
3421 If auto_remediate == True (default), then it will automatically update the data type used in the schema to
3422 the 'safest' among the types found in the sample set. """
3423
3424 updated_schema = {}
3425
3426 for key, data_type in self.schema.items():
3427
3428 if len(self.rows) < samples:
3429 samples = len(self.rows)
3430
3431 samples_dt_list = []
3432
3433 for x in range(0,samples):
3434
3435 if key not in self.rows[x]:
3436 logger.warning(f"warning: CustomTable - test_and_remediate_schema - unexpected error - "
3437 f"key not found in row - {x} - {key} - {self.rows[x]}")
3438 else:
3439 check_value = self.rows[x][key]
3440 samples_dt_list.append(self._get_best_guess_value_type(check_value))
3441
3442 # simple decision tree - will add more options over time
3443 if "text" in samples_dt_list or data_type=="text":
3444 dt = "text"
3445 elif "float" in samples_dt_list or data_type=="float":
3446 dt = "float"
3447 elif "integer" in samples_dt_list or "bigint" in samples_dt_list and data_type in ["integer", "bigint"]:
3448 dt = "integer"
3449 if self.db == "postgres":
3450 dt = "bigint"
3451 else:
3452 # when in doubt, assign 'text' as safest choice
3453 dt = "text"
3454
3455 updated_schema.update({key: dt})
3456
3457 if auto_remediate:
3458 self.schema = updated_schema
3459
3460 return updated_schema
3461
3462 def load_json(self, fp, fn, selected_keys=None, data_type_map=None, schema=None):
3463

Callers 2

build_tableFunction · 0.95
build_tableFunction · 0.95

Calls 2

itemsMethod · 0.80

Tested by

no test coverage detected