MCPcopy
hub / github.com/TheTom/turboquant_plus / run_completion_trajectory

Function run_completion_trajectory

refract/runner.py:548–667  ·  view source on GitHub ↗

v0.1.4: greedy-decode and capture model-token IDs at decode time. Drives the patched ``llama-completion`` binary with ``REFRACT_TRAJECTORY= `` set; the binary writes one JSONL record per sampled token (``{"step":N,"token_id":ID}``). We read the file back, return the ID seque

(
    model: Path,
    prompt: str,
    kv: KVConfig,
    n_predict: int = 128,
    ctx: int = 512,
    n_gpu_layers: int = 99,
    seed: int = 42,
    temperature: float = 0.0,
    timeout: float = 300.0,
    apply_chat_template: bool = True,
    system: Optional[str] = None,
    reasoning: str = "off",
)

Source from the content-addressed store, hash-verified

546
547
548def run_completion_trajectory(
549 model: Path,
550 prompt: str,
551 kv: KVConfig,
552 n_predict: int = 128,
553 ctx: int = 512,
554 n_gpu_layers: int = 99,
555 seed: int = 42,
556 temperature: float = 0.0,
557 timeout: float = 300.0,
558 apply_chat_template: bool = True,
559 system: Optional[str] = None,
560 reasoning: str = "off",
561) -> tuple[list[int], dict]:
562 """v0.1.4: greedy-decode and capture model-token IDs at decode time.
563
564 Drives the patched ``llama-completion`` binary with
565 ``REFRACT_TRAJECTORY=<tmpfile>`` set; the binary writes one JSONL record
566 per sampled token (``{"step":N,"token_id":ID}``). We read the file back,
567 return the ID sequence, and delete the file.
568
569 Returns (token_ids, metadata). The token IDs are the model&#x27;s own
570 sampled tokens — no detokenize→retokenize round-trip, no whitespace-vs-
571 model-token unit mismatch. This is the v0.1.4 fix for GTM&#x27;s structural
572 weakness (LIMITATIONS.md §1, §5).
573
574 Requires the ``llama-completion`` binary to be built from the patched
575 ``tools/completion/completion.cpp`` (REFRACT v0.1.4 patch). If the
576 binary lacks the patch, the trajectory file will be empty and this
577 function returns ``([], meta)``.
578 """
579 # v0.3.1: dispatch to active backend if non-llamacpp is set.
580 if _ACTIVE_BACKEND is not None and getattr(_ACTIVE_BACKEND, "name", None) != "llamacpp":
581 res = _ACTIVE_BACKEND.run_completion_trajectory(
582 model=model, prompt=prompt, kv_config_str=kv.label(),
583 n_predict=n_predict, ctx=ctx, n_gpu_layers=n_gpu_layers,
584 seed=seed, temperature=temperature, timeout=timeout,
585 apply_chat_template=apply_chat_template, system=system,
586 )
587 return res.token_ids, res.metadata
588
589 bin_path = _bin("llama-completion")
590
591 fd, traj_path = tempfile.mkstemp(prefix="refract-traj-", suffix=".jsonl")
592 os.close(fd)
593 os.unlink(traj_path) # patched binary creates it itself
594
595 cmd: list[str] = [
596 str(bin_path),
597 "-m", str(model),
598 "-p", prompt,
599 "-n", str(n_predict),
600 "-c", str(ctx),
601 "-ngl", str(n_gpu_layers),
602 "--seed", str(seed),
603 "--temp", str(temperature),
604 "-no-cnv",
605 "--no-display-prompt",

Calls 8

_binFunction · 0.85
_llama_extra_flagsFunction · 0.85
labelMethod · 0.80
closeMethod · 0.80
cli_argsMethod · 0.80
envMethod · 0.80
runMethod · 0.80