(predictor, predictor_args, model_args)
| 1575 | |
| 1576 | |
| 1577 | def benchmark(predictor, predictor_args, model_args): |
| 1578 | # Just construct a simple benchmark input. We pad input to the src_length. |
| 1579 | test_texts = "hello world, how are you?" |
| 1580 | benchmark_texts = [test_texts + "<pad>" * predictor_args.src_length for _ in range(predictor_args.batch_size)] |
| 1581 | |
| 1582 | batch_benchmark_texts = batchfy_text(benchmark_texts, predictor_args.batch_size) |
| 1583 | print("***********Start Benchmark**********") |
| 1584 | |
| 1585 | warmup_time = 10 |
| 1586 | test_time = 100 |
| 1587 | |
| 1588 | print("***********Start Warmup**********") |
| 1589 | for _ in range(warmup_time): |
| 1590 | for bs, batch_source_text in enumerate(batch_benchmark_texts): |
| 1591 | outputs = predictor.predict(batch_source_text) |
| 1592 | |
| 1593 | print("***********Start Speed Test**********") |
| 1594 | start = time.perf_counter() |
| 1595 | output_tokens = 0 |
| 1596 | for _ in range(test_time): |
| 1597 | for bs, batch_source_text in enumerate(batch_benchmark_texts): |
| 1598 | outputs = predictor.predict(batch_source_text) |
| 1599 | output_tokens += sum([len(output) for output in outputs]) |
| 1600 | end = time.perf_counter() |
| 1601 | print("Avg Elapse time is: ", (end - start) / test_time) |
| 1602 | print("Output tokens is: ", output_tokens) |
| 1603 | print( |
| 1604 | "Input length is: {}, Output length is: {}, bs is: {}, IPS: {:.3f} tokens/s, QPS: {:.3f} requests/s. ".format( |
| 1605 | predictor_args.src_length, |
| 1606 | predictor_args.max_length, |
| 1607 | predictor_args.batch_size, |
| 1608 | (output_tokens / (end - start)), |
| 1609 | (predictor_args.batch_size * test_time / (end - start)), |
| 1610 | ) |
| 1611 | ) |
| 1612 | |
| 1613 | |
| 1614 | if __name__ == "__main__": |
no test coverage detected
searching dependent graphs…