MCPcopy
hub / github.com/SqueezeAILab/LLMCompiler / AsyncStatsCallbackHandler

Class AsyncStatsCallbackHandler

src/callbacks/callbacks.py:44–100  ·  view source on GitHub ↗

Collect useful stats about the run. Add more stats as needed.

Source from the content-addressed store, hash-verified

42
43
44class AsyncStatsCallbackHandler(AsyncCallbackHandler):
45 """Collect useful stats about the run.
46 Add more stats as needed."""
47
48 def __init__(self, stream: bool = False) -> None:
49 super().__init__()
50 self.cnt = 0
51 self.input_tokens = 0
52 self.output_tokens = 0
53 # same for gpt-3.5
54 self.encoder = tiktoken.encoding_for_model("gpt-4")
55 self.stream = stream
56 self.all_times = []
57 self.additional_fields = {}
58 self.start_time = 0
59
60 async def on_chat_model_start(self, serialized, prompts, **kwargs):
61 self.start_time = time.time()
62 if self.stream:
63 # if streaming mode, on_llm_end response is not collected
64 # therefore, we need to count input token based on the
65 # prompt length at the beginning
66 self.cnt += 1
67 self.input_tokens += len(self.encoder.encode(prompts[0][0].content))
68
69 async def on_llm_new_token(self, token, *args, **kwargs):
70 if self.stream:
71 # if streaming mode, on_llm_end response is not collected
72 # therefore, we need to manually count output token based on the
73 # number of streamed out tokens
74 self.output_tokens += 1
75
76 async def on_llm_end(self, response, *args, **kwargs):
77 self.all_times.append(round(time.time() - self.start_time, 2))
78 if not self.stream:
79 # if not streaming mode, on_llm_end response is collected
80 # so we can use this stats directly
81 token_usage = response.llm_output["token_usage"]
82 self.input_tokens += token_usage["prompt_tokens"]
83 self.output_tokens += token_usage["completion_tokens"]
84 self.cnt += 1
85
86 def reset(self) -> None:
87 self.cnt = 0
88 self.input_tokens = 0
89 self.output_tokens = 0
90 self.all_times = []
91 self.additional_fields = {}
92
93 def get_stats(self) -> dict[str, int]:
94 return {
95 "calls": self.cnt,
96 "input_tokens": self.input_tokens,
97 "output_tokens": self.output_tokens,
98 "all_times": self.all_times,
99 **self.additional_fields,
100 }

Callers 1

__init__Method · 0.90

Calls

no outgoing calls

Tested by

no test coverage detected