Stream a chat completion response directly using Google Generative AI
(request: ChatCompletionRequest)
| 75 | |
| 76 | @app.post("/chat/completions/stream") |
| 77 | async def chat_completions_stream(request: ChatCompletionRequest): |
| 78 | """Stream a chat completion response directly using Google Generative AI""" |
| 79 | try: |
| 80 | # Check if request contains very large input |
| 81 | input_too_large = False |
| 82 | if request.messages and len(request.messages) > 0: |
| 83 | last_message = request.messages[-1] |
| 84 | if hasattr(last_message, 'content') and last_message.content: |
| 85 | tokens = count_tokens(last_message.content, request.provider == "ollama") |
| 86 | logger.info(f"Request size: {tokens} tokens") |
| 87 | if tokens > 8000: |
| 88 | logger.warning(f"Request exceeds recommended token limit ({tokens} > 7500)") |
| 89 | input_too_large = True |
| 90 | |
| 91 | # Create a new RAG instance for this request |
| 92 | try: |
| 93 | request_rag = RAG(provider=request.provider, model=request.model) |
| 94 | |
| 95 | # Extract custom file filter parameters if provided |
| 96 | excluded_dirs = None |
| 97 | excluded_files = None |
| 98 | included_dirs = None |
| 99 | included_files = None |
| 100 | |
| 101 | if request.excluded_dirs: |
| 102 | excluded_dirs = [unquote(dir_path) for dir_path in request.excluded_dirs.split('\n') if dir_path.strip()] |
| 103 | logger.info(f"Using custom excluded directories: {excluded_dirs}") |
| 104 | if request.excluded_files: |
| 105 | excluded_files = [unquote(file_pattern) for file_pattern in request.excluded_files.split('\n') if file_pattern.strip()] |
| 106 | logger.info(f"Using custom excluded files: {excluded_files}") |
| 107 | if request.included_dirs: |
| 108 | included_dirs = [unquote(dir_path) for dir_path in request.included_dirs.split('\n') if dir_path.strip()] |
| 109 | logger.info(f"Using custom included directories: {included_dirs}") |
| 110 | if request.included_files: |
| 111 | included_files = [unquote(file_pattern) for file_pattern in request.included_files.split('\n') if file_pattern.strip()] |
| 112 | logger.info(f"Using custom included files: {included_files}") |
| 113 | |
| 114 | request_rag.prepare_retriever(request.repo_url, request.type, request.token, excluded_dirs, excluded_files, included_dirs, included_files) |
| 115 | logger.info(f"Retriever prepared for {request.repo_url}") |
| 116 | except ValueError as e: |
| 117 | if "No valid documents with embeddings found" in str(e): |
| 118 | logger.error(f"No valid embeddings found: {str(e)}") |
| 119 | raise HTTPException(status_code=500, detail="No valid document embeddings found. This may be due to embedding size inconsistencies or API errors during document processing. Please try again or check your repository content.") |
| 120 | else: |
| 121 | logger.error(f"ValueError preparing retriever: {str(e)}") |
| 122 | raise HTTPException(status_code=500, detail=f"Error preparing retriever: {str(e)}") |
| 123 | except Exception as e: |
| 124 | logger.error(f"Error preparing retriever: {str(e)}") |
| 125 | # Check for specific embedding-related errors |
| 126 | if "All embeddings should be of the same size" in str(e): |
| 127 | raise HTTPException(status_code=500, detail="Inconsistent embedding sizes detected. Some documents may have failed to embed properly. Please try again.") |
| 128 | else: |
| 129 | raise HTTPException(status_code=500, detail=f"Error preparing retriever: {str(e)}") |
| 130 | |
| 131 | # Validate request |
| 132 | if not request.messages or len(request.messages) == 0: |
| 133 | raise HTTPException(status_code=400, detail="No messages provided") |
| 134 |
nothing calls this directly
no test coverage detected