MCPcopy
hub / github.com/langroid/langroid / test_agent_token_usage

Function test_agent_token_usage

tests/main/test_token_usage.py:45–94  ·  view source on GitHub ↗
(stream)

Source from the content-addressed store, hash-verified

43
44@pytest.mark.parametrize("stream", [True, False])
45def test_agent_token_usage(stream):
46 set_global(Settings(cache=False, stream=stream))
47 cfg = _TestChatAgentConfig(llm=config)
48 agent = ChatAgent(cfg)
49 agent.llm.reset_usage_cost()
50 question = "What is the capital of Canada?"
51 q_tokens = agent.num_tokens(question)
52 agent.llm_response_forget(question)
53 assert agent.total_llm_token_usage != 0
54 assert agent.total_llm_token_cost != 0
55
56 total_cost_after_1st_rnd = agent.total_llm_token_cost
57 total_tokens_after_1st_rnd = agent.total_llm_token_usage
58
59 set_global(Settings(cache=True, stream=stream))
60 # this convo shouldn't change the cost and tokens because `cache` is `True`
61 response0 = agent.llm_response_forget(question)
62 assert total_cost_after_1st_rnd == agent.total_llm_token_cost
63 assert agent.total_llm_token_usage == total_tokens_after_1st_rnd
64
65 # This convo should change the cost because `cache` is `False`:
66 # IF the response is identical to before, then the
67 # number of accumulated tokens should be doubled, but
68 # we allow for variation in the response
69 set_global(Settings(cache=False, stream=stream))
70 response1 = agent.llm_response(question)
71 assert (
72 agent.total_llm_token_usage
73 == 2 * total_tokens_after_1st_rnd
74 + agent.num_tokens(response1.content)
75 - agent.num_tokens(response0.content)
76 )
77 assert agent.total_llm_token_cost > total_cost_after_1st_rnd * 1.1
78
79 # check that cost/usage accumulation in agent matches that in llm
80 llm_usage = agent.llm.usage_cost_dict[agent.config.llm.chat_model]
81 assert (
82 llm_usage.prompt_tokens + llm_usage.completion_tokens
83 == agent.total_llm_token_usage
84 )
85 assert llm_usage.cost == agent.total_llm_token_cost
86
87 # check proper accumulation of prompt tokens across multiple rounds
88 response2 = agent.llm_response(question)
89 assert (
90 response2.metadata.usage.prompt_tokens
91 >= response1.metadata.usage.prompt_tokens
92 + response1.metadata.usage.completion_tokens
93 + q_tokens
94 )
95
96
97@pytest.mark.parametrize("fn", [True, False])

Callers

nothing calls this directly

Calls 8

llm_response_forgetMethod · 0.95
llm_responseMethod · 0.95
set_globalFunction · 0.90
SettingsClass · 0.90
ChatAgentClass · 0.90
reset_usage_costMethod · 0.80
num_tokensMethod · 0.45

Tested by

no test coverage detected

Used in the wild real call sites across dependent graphs

searching dependent graphs…