调整流式响应默认设置为非流式,确保与 OpenAI 兼容
All checks were successful
Build and Push OCI GenAI Gateway Docker Image / docker-build-push (push) Successful in 34s
All checks were successful
Build and Push OCI GenAI Gateway Docker Image / docker-build-push (push) Successful in 34s
This commit is contained in:
@@ -207,12 +207,15 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
||||
# Extract parameters
|
||||
params = extract_chat_params(request)
|
||||
|
||||
# Check global streaming setting
|
||||
# If streaming is globally disabled, override client request
|
||||
enable_stream = request.stream and settings.enable_streaming
|
||||
# Determine streaming mode
|
||||
# Priority: request.stream (client) > settings.enable_streaming (global)
|
||||
# Only enable streaming if BOTH conditions are met:
|
||||
# 1. Client explicitly requests stream=true (default is false per OpenAI standard)
|
||||
# 2. Global streaming is enabled via ENABLE_STREAMING
|
||||
enable_stream = request.stream is True and settings.enable_streaming
|
||||
|
||||
if not settings.enable_streaming and request.stream:
|
||||
logger.info("Streaming requested but globally disabled via ENABLE_STREAMING=false")
|
||||
if request.stream is True and not settings.enable_streaming:
|
||||
logger.info("Streaming requested by client but globally disabled via ENABLE_STREAMING=false")
|
||||
|
||||
# Handle streaming
|
||||
if enable_stream:
|
||||
|
||||
@@ -23,7 +23,7 @@ class ChatCompletionRequest(BaseModel):
|
||||
temperature: Optional[float] = 0.7
|
||||
top_p: Optional[float] = 1.0
|
||||
n: Optional[int] = 1
|
||||
stream: Optional[bool] = True # Default to streaming
|
||||
stream: Optional[bool] = False # Default to non-streaming (OpenAI compatible)
|
||||
stop: Optional[Union[str, List[str]]] = None
|
||||
max_tokens: Optional[int] = None
|
||||
presence_penalty: Optional[float] = 0.0
|
||||
|
||||
Reference in New Issue
Block a user