MCPcopy
hub / github.com/ModelEngine-Group/nexent / main

Function main

deploy/sql/migrations/generate_backfill_sql.py:79–268  ·  view source on GitHub ↗
()

Source from the content-addressed store, hash-verified

77
78
79def main() -> None:
80 today = date.today().strftime("%Y-%m-%d")
81 lines: list[str] = []
82
83 lines.append(f"-- Generated by deploy/sql/migrations/generate_backfill_sql.py on {today}")
84 lines.append(f"-- Catalog revision: {CATALOG_REVISION}")
85 lines.append(f"-- Catalog entries: {len(CATALOG)}")
86 lines.append("--")
87 lines.append("-- Migration kind: RECOMMENDED_DATA_FIX")
88 lines.append("-- Idempotent: COALESCE + IS NULL guards protect existing values.")
89 lines.append("-- Safe: enforces max_output < context_window via GREATEST/LEAST.")
90 lines.append("--")
91 lines.append("-- Phases:")
92 lines.append("-- 1a Bare LLM/VLM rows that match a catalog entry by")
93 lines.append("-- (model_factory, model_repo, model_name) -> fill capacity")
94 lines.append("-- fields + tag capacity_source='profile' + profile_version.")
95 lines.append("-- 1b Already-filled rows that match a catalog entry AND whose")
96 lines.append("-- context_window_tokens and max_output_tokens exactly equal")
97 lines.append("-- the catalog values -> tag profile_version only. capacity_")
98 lines.append("-- source stays whatever it was (typically 'operator'); we")
99 lines.append("-- don't rewrite provenance, we just add the dispatch tag so")
100 lines.append("-- dispatch_profile_hit_total can fire.")
101 lines.append("-- 2 Remaining bare LLM/VLM rows -> safe defaults.")
102 lines.append("-- 3 Clamp default_output_reserve_tokens to <= max_output_tokens.")
103 lines.append("--")
104 lines.append("-- Pre-run self-check (rows whose capability_profile_version is NULL):")
105 lines.append("--")
106 lines.append("-- SELECT model_id, model_repo, model_name, model_factory,")
107 lines.append("-- context_window_tokens, max_output_tokens, capability_profile_version")
108 lines.append("-- FROM nexent.model_record_t")
109 lines.append("-- WHERE delete_flag = 'N'")
110 lines.append("-- AND COALESCE(model_type, 'llm') IN ('llm', 'vlm')")
111 lines.append("-- AND capability_profile_version IS NULL;")
112 lines.append("")
113
114 # Group catalog by provider so the generated SQL has tidy section headers
115 from collections import defaultdict
116 by_provider: dict[str, list] = defaultdict(list)
117 for (provider, full_id), profile in CATALOG.items():
118 by_provider[provider].append((full_id, profile))
119
120 # --------------------------------------------------------------
121 # Phase 1a: catalog match + bare -> fill capacity + tag
122 # --------------------------------------------------------------
123 lines.append("-- ============================================================")
124 lines.append("-- Phase 1a: Backfill bare rows that match approved catalog entries")
125 lines.append("-- ============================================================")
126 lines.append("")
127 lines.append("DO $$")
128 lines.append("DECLARE")
129 lines.append(" v_updated INTEGER := 0;")
130 lines.append(" v_total INTEGER := 0;")
131 lines.append(" c_active_flag CONSTANT TEXT := 'N';")
132 lines.append(" c_source_profile CONSTANT TEXT := 'profile';")
133 lines.append("BEGIN")
134
135 for provider in sorted(by_provider.keys()):
136 entries = by_provider[provider]

Callers 1

Calls 8

_sql_strFunction · 0.85
_sql_repo_matchFunction · 0.85
_sql_intFunction · 0.85
strftimeMethod · 0.80
appendMethod · 0.80
itemsMethod · 0.80
_split_repo_nameFunction · 0.70
joinMethod · 0.45

Tested by

no test coverage detected