| 980 | return jsonify({"status": "ok"}), 200 |
| 981 | |
| 982 | def parse_args(): |
| 983 | parser = argparse.ArgumentParser(description="Run LLM inference with various approaches.") |
| 984 | |
| 985 | try: |
| 986 | from optillm import __version__ as package_version |
| 987 | except ImportError: |
| 988 | package_version = "unknown" |
| 989 | |
| 990 | parser.add_argument('--version', action='version', |
| 991 | version=f'%(prog)s {package_version}', |
| 992 | help="Show program's version number and exit") |
| 993 | |
| 994 | # Define arguments and their corresponding environment variables |
| 995 | args_env = [ |
| 996 | ("--optillm-api-key", "OPTILLM_API_KEY", str, "", "Optional API key for client authentication to optillm"), |
| 997 | ("--approach", "OPTILLM_APPROACH", str, "auto", "Inference approach to use", known_approaches + list(plugin_approaches.keys())), |
| 998 | ("--mcts-simulations", "OPTILLM_SIMULATIONS", int, 2, "Number of MCTS simulations"), |
| 999 | ("--mcts-exploration", "OPTILLM_EXPLORATION", float, 0.2, "Exploration weight for MCTS"), |
| 1000 | ("--mcts-depth", "OPTILLM_DEPTH", int, 1, "Simulation depth for MCTS"), |
| 1001 | ("--model", "OPTILLM_MODEL", str, "gpt-4o-mini", "OpenAI model to use"), |
| 1002 | ("--rstar-max-depth", "OPTILLM_RSTAR_MAX_DEPTH", int, 3, "Maximum depth for rStar algorithm"), |
| 1003 | ("--rstar-num-rollouts", "OPTILLM_RSTAR_NUM_ROLLOUTS", int, 5, "Number of rollouts for rStar algorithm"), |
| 1004 | ("--rstar-c", "OPTILLM_RSTAR_C", float, 1.4, "Exploration constant for rStar algorithm"), |
| 1005 | ("--n", "OPTILLM_N", int, 1, "Number of final responses to be returned"), |
| 1006 | ("--return-full-response", "OPTILLM_RETURN_FULL_RESPONSE", bool, False, "Return the full response including the CoT with <thinking> tags"), |
| 1007 | ("--host", "OPTILLM_HOST", str, "127.0.0.1", "Host address to bind the server to (use 0.0.0.0 to allow external connections)"), |
| 1008 | ("--port", "OPTILLM_PORT", int, 8000, "Specify the port to run the proxy"), |
| 1009 | ("--log", "OPTILLM_LOG", str, "info", "Specify the logging level", list(logging_levels.keys())), |
| 1010 | ("--launch-gui", "OPTILLM_LAUNCH_GUI", bool, False, "Launch a Gradio chat interface"), |
| 1011 | ("--plugins-dir", "OPTILLM_PLUGINS_DIR", str, "", "Path to the plugins directory"), |
| 1012 | ("--log-conversations", "OPTILLM_LOG_CONVERSATIONS", bool, False, "Enable conversation logging with full metadata"), |
| 1013 | ("--conversation-log-dir", "OPTILLM_CONVERSATION_LOG_DIR", str, str(Path.home() / ".optillm" / "conversations"), "Directory to save conversation logs"), |
| 1014 | ] |
| 1015 | |
| 1016 | for arg, env, type_, default, help_text, *extra in args_env: |
| 1017 | env_value = os.environ.get(env) |
| 1018 | if env_value is not None: |
| 1019 | if type_ == bool: |
| 1020 | default = env_value.lower() in ('true', '1', 'yes') |
| 1021 | else: |
| 1022 | default = type_(env_value) |
| 1023 | if extra and extra[0]: # Check if there are choices for this argument |
| 1024 | parser.add_argument(arg, type=type_, default=default, help=help_text, choices=extra[0]) |
| 1025 | else: |
| 1026 | if type_ == bool: |
| 1027 | # For boolean flags, use store_true action |
| 1028 | parser.add_argument(arg, action='store_true', default=default, help=help_text) |
| 1029 | else: |
| 1030 | parser.add_argument(arg, type=type_, default=default, help=help_text) |
| 1031 | |
| 1032 | # Special handling for best_of_n to support both formats |
| 1033 | best_of_n_default = int(os.environ.get("OPTILLM_BEST_OF_N", 3)) |
| 1034 | parser.add_argument("--best-of-n", "--best_of_n", dest="best_of_n", type=int, default=best_of_n_default, |
| 1035 | help="Number of samples for best_of_n approach") |
| 1036 | |
| 1037 | # Special handling for base_url to support both formats |
| 1038 | base_url_default = os.environ.get("OPTILLM_BASE_URL", "") |
| 1039 | parser.add_argument("--base-url", "--base_url", dest="base_url", type=str, default=base_url_default, |