MCPcopy
hub / github.com/z-lab/dflash / _print_decode_summary

Function _print_decode_summary

dflash/benchmark.py:120–132  ·  view source on GitHub ↗
(responses: list[dict[int, SimpleNamespace]], block_size: int)

Source from the content-addressed store, hash-verified

118
119
120def _print_decode_summary(responses: list[dict[int, SimpleNamespace]], block_size: int) -> None:
121 baseline_tpot = np.mean([r[1].time_per_output_token for r in responses])
122 dflash_tpot = np.mean([r[block_size].time_per_output_token for r in responses])
123 print(f"Baseline throughput: {1 / baseline_tpot:.2f} tok/s")
124 print(f"DFlash throughput: {1 / dflash_tpot:.2f} tok/s")
125 print(f"Decoding speedup: {baseline_tpot / dflash_tpot:.2f}")
126
127 mean_accept = np.mean([np.mean(r[block_size].acceptance_lengths) for r in responses])
128 print(f"Average Acceptance length: {mean_accept:.2f}")
129
130 acceptance_lengths = list(chain.from_iterable(r[block_size].acceptance_lengths for r in responses))
131 histogram = [acceptance_lengths.count(b) / len(acceptance_lengths) for b in range(block_size + 1)]
132 print(f"Acceptance length histogram: {[f'{x * 100:.1f}%' for x in histogram]}")
133
134
135def _env_int(name: str, default: int) -> int:

Callers 2

_run_transformersFunction · 0.85
_run_mlxFunction · 0.85

Calls

no outgoing calls

Tested by

no test coverage detected