hub / github.com/z-lab/dflash / _print_decode_summary

Function _print_decode_summary

dflash/benchmark.py:120–132 · view source on GitHub ↗

(responses: list[dict[int, SimpleNamespace]], block_size: int)

Source from the content-addressed store, hash-verified

118
119
120	def _print_decode_summary(responses: list[dict[int, SimpleNamespace]], block_size: int) -> None:
121	baseline_tpot = np.mean([r[1].time_per_output_token for r in responses])
122	dflash_tpot = np.mean([r[block_size].time_per_output_token for r in responses])
123	print(f"Baseline throughput: {1 / baseline_tpot:.2f} tok/s")
124	print(f"DFlash throughput: {1 / dflash_tpot:.2f} tok/s")
125	print(f"Decoding speedup: {baseline_tpot / dflash_tpot:.2f}")
126
127	mean_accept = np.mean([np.mean(r[block_size].acceptance_lengths) for r in responses])
128	print(f"Average Acceptance length: {mean_accept:.2f}")
129
130	acceptance_lengths = list(chain.from_iterable(r[block_size].acceptance_lengths for r in responses))
131	histogram = [acceptance_lengths.count(b) / len(acceptance_lengths) for b in range(block_size + 1)]
132	print(f"Acceptance length histogram: {[f'{x * 100:.1f}%' for x in histogram]}")
133
134
135	def _env_int(name: str, default: int) -> int:

_run_transformersFunction · 0.85

_run_mlxFunction · 0.85

no outgoing calls

no test coverage detected