(stream)
| 43 | |
| 44 | @pytest.mark.parametrize("stream", [True, False]) |
| 45 | def test_agent_token_usage(stream): |
| 46 | set_global(Settings(cache=False, stream=stream)) |
| 47 | cfg = _TestChatAgentConfig(llm=config) |
| 48 | agent = ChatAgent(cfg) |
| 49 | agent.llm.reset_usage_cost() |
| 50 | question = "What is the capital of Canada?" |
| 51 | q_tokens = agent.num_tokens(question) |
| 52 | agent.llm_response_forget(question) |
| 53 | assert agent.total_llm_token_usage != 0 |
| 54 | assert agent.total_llm_token_cost != 0 |
| 55 | |
| 56 | total_cost_after_1st_rnd = agent.total_llm_token_cost |
| 57 | total_tokens_after_1st_rnd = agent.total_llm_token_usage |
| 58 | |
| 59 | set_global(Settings(cache=True, stream=stream)) |
| 60 | # this convo shouldn't change the cost and tokens because `cache` is `True` |
| 61 | response0 = agent.llm_response_forget(question) |
| 62 | assert total_cost_after_1st_rnd == agent.total_llm_token_cost |
| 63 | assert agent.total_llm_token_usage == total_tokens_after_1st_rnd |
| 64 | |
| 65 | # This convo should change the cost because `cache` is `False`: |
| 66 | # IF the response is identical to before, then the |
| 67 | # number of accumulated tokens should be doubled, but |
| 68 | # we allow for variation in the response |
| 69 | set_global(Settings(cache=False, stream=stream)) |
| 70 | response1 = agent.llm_response(question) |
| 71 | assert ( |
| 72 | agent.total_llm_token_usage |
| 73 | == 2 * total_tokens_after_1st_rnd |
| 74 | + agent.num_tokens(response1.content) |
| 75 | - agent.num_tokens(response0.content) |
| 76 | ) |
| 77 | assert agent.total_llm_token_cost > total_cost_after_1st_rnd * 1.1 |
| 78 | |
| 79 | # check that cost/usage accumulation in agent matches that in llm |
| 80 | llm_usage = agent.llm.usage_cost_dict[agent.config.llm.chat_model] |
| 81 | assert ( |
| 82 | llm_usage.prompt_tokens + llm_usage.completion_tokens |
| 83 | == agent.total_llm_token_usage |
| 84 | ) |
| 85 | assert llm_usage.cost == agent.total_llm_token_cost |
| 86 | |
| 87 | # check proper accumulation of prompt tokens across multiple rounds |
| 88 | response2 = agent.llm_response(question) |
| 89 | assert ( |
| 90 | response2.metadata.usage.prompt_tokens |
| 91 | >= response1.metadata.usage.prompt_tokens |
| 92 | + response1.metadata.usage.completion_tokens |
| 93 | + q_tokens |
| 94 | ) |
| 95 | |
| 96 | |
| 97 | @pytest.mark.parametrize("fn", [True, False]) |
nothing calls this directly
no test coverage detected
searching dependent graphs…