()
| 696 | |
| 697 | @app.route('/v1/chat/completions', methods=['POST']) |
| 698 | def proxy(): |
| 699 | logger.info('Received request to /v1/chat/completions') |
| 700 | data = request.get_json() |
| 701 | auth_header = request.headers.get("Authorization") |
| 702 | bearer_token = "" |
| 703 | |
| 704 | if auth_header and auth_header.startswith("Bearer "): |
| 705 | bearer_token = auth_header.split("Bearer ")[1].strip() |
| 706 | logger.debug(f"Intercepted Bearer Token: {bearer_token}") |
| 707 | |
| 708 | logger.debug(f'Request data: {data}') |
| 709 | |
| 710 | stream = data.get('stream', False) |
| 711 | messages = data.get('messages', []) |
| 712 | model = data.get('model', server_config['model']) |
| 713 | n = data.get('n', server_config['n']) # Get n value from request or config |
| 714 | # Extract response_format if present |
| 715 | response_format = data.get("response_format", None) |
| 716 | |
| 717 | # Handle max_completion_tokens (preferred) and max_tokens (deprecated but supported) |
| 718 | # Priority: max_completion_tokens > max_tokens |
| 719 | max_completion_tokens = data.get('max_completion_tokens') |
| 720 | max_tokens = data.get('max_tokens') |
| 721 | |
| 722 | # Explicit keys that we are already handling |
| 723 | explicit_keys = {'stream', 'messages', 'model', 'n', 'response_format', 'max_completion_tokens', 'max_tokens'} |
| 724 | |
| 725 | # Copy the rest into request_config |
| 726 | request_config = {k: v for k, v in data.items() if k not in explicit_keys} |
| 727 | |
| 728 | # Add the explicitly handled ones |
| 729 | request_config.update({ |
| 730 | "stream": stream, |
| 731 | "n": n, |
| 732 | }) |
| 733 | |
| 734 | # Only add response_format if it's not None |
| 735 | if response_format is not None: |
| 736 | request_config['response_format'] = response_format |
| 737 | |
| 738 | # Add token limits to request_config with proper priority |
| 739 | if max_completion_tokens is not None: |
| 740 | request_config['max_completion_tokens'] = max_completion_tokens |
| 741 | request_config['max_tokens'] = max_completion_tokens # For backward compatibility with approaches that read max_tokens |
| 742 | elif max_tokens is not None: |
| 743 | request_config['max_tokens'] = max_tokens |
| 744 | |
| 745 | optillm_approach = data.get('optillm_approach', server_config['approach']) |
| 746 | logger.debug(data) |
| 747 | server_config['mcts_depth'] = data.get('mcts_depth', server_config['mcts_depth']) |
| 748 | server_config['mcts_exploration'] = data.get('mcts_exploration', server_config['mcts_exploration']) |
| 749 | server_config['mcts_simulations'] = data.get('mcts_simulations', server_config['mcts_simulations']) |
| 750 | |
| 751 | system_prompt, initial_query, message_optillm_approach = parse_conversation(messages) |
| 752 | |
| 753 | if message_optillm_approach: |
| 754 | optillm_approach = message_optillm_approach |
| 755 |
nothing calls this directly
no test coverage detected