Cherry Studio 客户端优化
All checks were successful
Build and Push OCI GenAI Gateway Docker Image / docker-build-push (push) Successful in 35s
All checks were successful
Build and Push OCI GenAI Gateway Docker Image / docker-build-push (push) Successful in 35s
This commit is contained in:
90
.env.example
90
.env.example
@@ -1,23 +1,37 @@
|
|||||||
# API Settings
|
# ============================================
|
||||||
|
# API 服务设置
|
||||||
|
# ============================================
|
||||||
|
# API 服务标题(显示在 OpenAPI 文档中)
|
||||||
API_TITLE=OCI GenAI to OpenAI API Gateway
|
API_TITLE=OCI GenAI to OpenAI API Gateway
|
||||||
|
# API 版本号
|
||||||
API_VERSION=0.0.1
|
API_VERSION=0.0.1
|
||||||
|
# API 路由前缀(符合 OpenAI API 规范,不建议修改)
|
||||||
API_PREFIX=/v1
|
API_PREFIX=/v1
|
||||||
|
# 服务监听端口
|
||||||
API_PORT=8000
|
API_PORT=8000
|
||||||
|
# 服务监听地址(0.0.0.0 表示监听所有网络接口)
|
||||||
API_HOST=0.0.0.0
|
API_HOST=0.0.0.0
|
||||||
|
# 调试模式(生产环境应设置为 false)
|
||||||
DEBUG=false
|
DEBUG=false
|
||||||
|
|
||||||
# Authentication
|
# ============================================
|
||||||
# Comma-separated list of API keys for authentication
|
# 认证设置
|
||||||
# These are the keys clients will use in Authorization: Bearer <key>
|
# ============================================
|
||||||
|
# API 密钥列表(JSON 数组格式)
|
||||||
|
# 客户端通过 Authorization: Bearer <key> 头进行认证
|
||||||
|
# 支持配置多个密钥,用于不同的客户端或应用
|
||||||
|
# 示例:
|
||||||
|
# 单个密钥:API_KEYS=["sk-your-secret-key"]
|
||||||
|
# 多个密钥:API_KEYS=["sk-admin-key","sk-user-key","sk-app-key"]
|
||||||
API_KEYS=["sk-oci-genai-default-key"]
|
API_KEYS=["sk-oci-genai-default-key"]
|
||||||
|
|
||||||
# ============================================
|
# ============================================
|
||||||
# OCI Configuration
|
# OCI 配置
|
||||||
# ============================================
|
# ============================================
|
||||||
# Path to OCI config file (usually ~/.oci/config)
|
# OCI 配置文件路径(通常为 ~/.oci/config)
|
||||||
OCI_CONFIG_FILE=~/.oci/config
|
OCI_CONFIG_FILE=~/.oci/config
|
||||||
|
|
||||||
# Profile names in the OCI config file
|
# OCI 配置文件中的 profile 名称
|
||||||
# 支持单个或多个 profile,多个 profile 用逗号分隔
|
# 支持单个或多个 profile,多个 profile 用逗号分隔
|
||||||
# 多个 profile 时会自动使用轮询(round-robin)负载均衡
|
# 多个 profile 时会自动使用轮询(round-robin)负载均衡
|
||||||
# 示例:
|
# 示例:
|
||||||
@@ -26,51 +40,61 @@ OCI_CONFIG_FILE=~/.oci/config
|
|||||||
# 注意:每个 profile 在 ~/.oci/config 中必须包含 region 和 tenancy (作为 compartment_id)
|
# 注意:每个 profile 在 ~/.oci/config 中必须包含 region 和 tenancy (作为 compartment_id)
|
||||||
OCI_CONFIG_PROFILE=DEFAULT
|
OCI_CONFIG_PROFILE=DEFAULT
|
||||||
|
|
||||||
# Authentication type: api_key or instance_principal
|
# 认证类型:api_key 或 instance_principal
|
||||||
OCI_AUTH_TYPE=api_key
|
OCI_AUTH_TYPE=api_key
|
||||||
|
|
||||||
# OCI Client Timeout Settings
|
# OCI 客户端超时设置
|
||||||
# Connect timeout: Maximum time (in seconds) to establish connection to OCI API
|
# 连接超时:与 OCI API 建立连接的最大时间(秒)
|
||||||
OCI_CONNECT_TIMEOUT=10
|
OCI_CONNECT_TIMEOUT=10
|
||||||
# Read timeout: Maximum time (in seconds) to wait for OCI API response
|
# 读取超时:等待 OCI API 响应的最大时间(秒)
|
||||||
# Increase this value for long-running requests (e.g., complex conversations)
|
# 处理长时间运行的请求时(例如复杂对话)可增加此值
|
||||||
OCI_READ_TIMEOUT=360
|
OCI_READ_TIMEOUT=360
|
||||||
|
|
||||||
# Optional: Direct endpoint for dedicated models
|
# 可选:专用模型的直接端点
|
||||||
# GENAI_ENDPOINT=https://your-dedicated-endpoint
|
# GENAI_ENDPOINT=https://your-dedicated-endpoint
|
||||||
|
|
||||||
# Model Settings
|
# ============================================
|
||||||
# Note: Available models are dynamically loaded from OCI at startup
|
# 模型设置
|
||||||
# Use GET /v1/models to see all available models
|
# ============================================
|
||||||
MAX_TOKENS=4096
|
# 注意:可用模型在启动时从 OCI 动态加载
|
||||||
|
# 使用 GET /v1/models 查看所有可用模型
|
||||||
|
MAX_TOKENS=8192
|
||||||
TEMPERATURE=0.7
|
TEMPERATURE=0.7
|
||||||
|
|
||||||
# Embedding Settings
|
# ============================================
|
||||||
# Truncate strategy for embeddings: END or START
|
# 嵌入向量设置
|
||||||
|
# ============================================
|
||||||
|
# 嵌入向量的截断策略:END(保留开头,截断末尾)或 START(保留末尾,截断开头)
|
||||||
EMBED_TRUNCATE=END
|
EMBED_TRUNCATE=END
|
||||||
|
|
||||||
# Streaming Settings
|
# ============================================
|
||||||
# Global streaming on/off switch
|
# 流式响应设置
|
||||||
# Set to false to disable streaming for all requests (overrides client stream=true)
|
# ============================================
|
||||||
|
# 全局流式响应开关
|
||||||
|
# 设置为 false 将禁用所有流式请求(覆盖客户端的 stream=true 设置)
|
||||||
ENABLE_STREAMING=true
|
ENABLE_STREAMING=true
|
||||||
# Chunk size for simulated streaming (fallback mode only)
|
# 模拟流式传输的分块大小(仅在回退模式下使用)
|
||||||
# Only used when OCI returns non-streaming response
|
# 仅当 OCI 返回非流式响应时使用
|
||||||
STREAM_CHUNK_SIZE=1024
|
STREAM_CHUNK_SIZE=1024
|
||||||
|
|
||||||
# Logging
|
# ============================================
|
||||||
# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
|
# 日志设置
|
||||||
|
# ============================================
|
||||||
|
# 日志级别:DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||||
LOG_LEVEL=INFO
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
# Enable detailed request/response logging for debugging
|
# 启用详细的请求/响应日志记录以进行调试
|
||||||
# LOG_REQUESTS: Print incoming request details (method, URL, headers, body)
|
# LOG_REQUESTS:打印传入请求的详细信息(方法、URL、请求头、请求体)
|
||||||
# LOG_RESPONSES: Print outgoing response details (status, headers, body)
|
# LOG_RESPONSES:打印发出响应的详细信息(状态码、响应头、响应体)
|
||||||
# Note: Sensitive data (like API keys) are automatically filtered from logs
|
# LOG_STREAMING:打印流式响应内容(⚠️ 增加内存使用和日志大小)
|
||||||
|
# 注意:敏感数据(如 API 密钥)会自动从日志中过滤
|
||||||
LOG_REQUESTS=false
|
LOG_REQUESTS=false
|
||||||
LOG_RESPONSES=false
|
LOG_RESPONSES=false
|
||||||
|
LOG_STREAMING=true
|
||||||
|
|
||||||
# Log file path (optional, if not set logs only to console)
|
# 日志文件路径(可选,如果未设置则仅输出到控制台)
|
||||||
LOG_FILE=./logs/app.log
|
LOG_FILE=./logs/app.log
|
||||||
# Max log file size in MB (default: 10)
|
# 日志文件最大大小(MB,默认:10)
|
||||||
LOG_FILE_MAX_SIZE=10
|
LOG_FILE_MAX_SIZE=10
|
||||||
# Number of backup log files to keep (default: 5)
|
# 保留的备份日志文件数量(默认:5)
|
||||||
LOG_FILE_BACKUP_COUNT=5
|
LOG_FILE_BACKUP_COUNT=5
|
||||||
|
|||||||
80
README.md
80
README.md
@@ -19,6 +19,8 @@
|
|||||||
- ⚡ **真实流式传输**: 真正的边缘到边缘流式响应,TTFB < 200ms
|
- ⚡ **真实流式传输**: 真正的边缘到边缘流式响应,TTFB < 200ms
|
||||||
- 🔒 **安全性**: 自动过滤敏感信息(OCID、request-id、endpoint URLs)
|
- 🔒 **安全性**: 自动过滤敏感信息(OCID、request-id、endpoint URLs)
|
||||||
- 🎯 **性能优化**: 客户端连接池机制,显著提升性能
|
- 🎯 **性能优化**: 客户端连接池机制,显著提升性能
|
||||||
|
- 🎨 **高级参数支持**: reasoning_effort 等参数
|
||||||
|
- 🍒 **Cherry Studio 优化**: 自动映射 thinking_budget,客户端名称识别
|
||||||
|
|
||||||
## 🚀 快速开始
|
## 🚀 快速开始
|
||||||
|
|
||||||
@@ -153,6 +155,68 @@ response = client.chat.completions.create(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## 🚀 高级功能
|
||||||
|
|
||||||
|
### 高级参数支持
|
||||||
|
|
||||||
|
网关支持高级参数来增强模型响应能力:
|
||||||
|
|
||||||
|
#### reasoning_effort - 推理深度控制
|
||||||
|
|
||||||
|
控制模型的推理深度,影响响应质量:
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="google.gemini-2.5-pro",
|
||||||
|
messages=[{"role": "user", "content": "Solve this complex problem"}],
|
||||||
|
extra_body={"reasoning_effort": "high"} # low, medium, high
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
详细说明请参考 [高级参数支持文档](docs/ADVANCED_PARAMETERS.md)。
|
||||||
|
|
||||||
|
### Cherry Studio 客户端优化
|
||||||
|
|
||||||
|
网关为 Cherry Studio 客户端提供了专属优化功能:
|
||||||
|
|
||||||
|
#### 自动映射 thinking_budget
|
||||||
|
|
||||||
|
Cherry Studio 的 `thinking_budget` 参数会自动映射到 OCI 的 `reasoning_effort`:
|
||||||
|
|
||||||
|
- thinking_budget ≤ 1760 → `reasoning_effort: low`
|
||||||
|
- 1760 < thinking_budget ≤ 16448 → `reasoning_effort: medium`
|
||||||
|
- thinking_budget > 16448 → `reasoning_effort: high`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-oci-genai-default-key" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [{"role": "user", "content": "Complex problem..."}],
|
||||||
|
"extra_body": {
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": 10000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
#### 客户端名称识别
|
||||||
|
|
||||||
|
通过 `x-title` 请求头识别客户端,便于日志追踪和调试:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
详细说明请参考 [Cherry Studio 客户端优化文档](docs/CHERRY_STUDIO_OPTIMIZATION.md)。
|
||||||
|
|
||||||
## 📋 支持的端点
|
## 📋 支持的端点
|
||||||
|
|
||||||
| 端点 | 方法 | 说明 |
|
| 端点 | 方法 | 说明 |
|
||||||
@@ -188,7 +252,9 @@ response = client.chat.completions.create(
|
|||||||
| `ENABLE_STREAMING` | 全局流式开关 | `true` |
|
| `ENABLE_STREAMING` | 全局流式开关 | `true` |
|
||||||
| `LOG_LEVEL` | 日志级别 | `INFO` |
|
| `LOG_LEVEL` | 日志级别 | `INFO` |
|
||||||
|
|
||||||
完整配置请参考 [.env.example](.env.example)
|
**📖 完整配置说明**:
|
||||||
|
- [环境变量配置文档](docs/ENVIRONMENT_VARIABLES.md) - 所有环境变量的详细说明、使用场景和配置示例
|
||||||
|
- [.env.example](.env.example) - 环境变量配置示例文件
|
||||||
|
|
||||||
## 🌐 多区域负载均衡
|
## 🌐 多区域负载均衡
|
||||||
|
|
||||||
@@ -214,8 +280,16 @@ docker run -p 8000:8000 --env-file .env oci-genai-gateway
|
|||||||
|
|
||||||
## 📚 文档
|
## 📚 文档
|
||||||
|
|
||||||
- [CLAUDE.md](CLAUDE.md) - 完整的开发文档,包含架构说明、开发指南和调试技巧
|
### 核心文档
|
||||||
- [.env.example](.env.example) - 环境变量配置示例
|
|
||||||
|
- [环境变量配置说明](docs/ENVIRONMENT_VARIABLES.md) - 所有环境变量的详细说明和配置示例
|
||||||
|
- [.env.example](.env.example) - 环境变量配置示例文件
|
||||||
|
|
||||||
|
### 功能优化文档
|
||||||
|
|
||||||
|
- [高级参数支持](docs/ADVANCED_PARAMETERS.md) - reasoning_effort 参数详解
|
||||||
|
- [Cherry Studio 客户端优化](docs/CHERRY_STUDIO_OPTIMIZATION.md) - thinking_budget 映射和客户端识别
|
||||||
|
- [OCI 访问权限配置](docs/OCI-SETUP-GUIDE.md) - 自动化配置 OCI GenAI 访问权限
|
||||||
|
|
||||||
## 🔧 故障排除
|
## 🔧 故障排除
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,11 @@
|
|||||||
services:
|
services:
|
||||||
oci-genai-gateway:
|
oci-genai-gateway:
|
||||||
|
# 使用本地 Dockerfile 构建镜像
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: Dockerfile
|
dockerfile: Dockerfile
|
||||||
|
# 使用预构建的镜像(如有需要可取消注释)
|
||||||
|
# image: gitea.bcde.io/wangdefa/oracle-openai:latest
|
||||||
container_name: oci-genai-gateway
|
container_name: oci-genai-gateway
|
||||||
ports:
|
ports:
|
||||||
- "8000:8000"
|
- "8000:8000"
|
||||||
|
|||||||
354
docs/CHERRY_STUDIO_OPTIMIZATION.md
Normal file
354
docs/CHERRY_STUDIO_OPTIMIZATION.md
Normal file
@@ -0,0 +1,354 @@
|
|||||||
|
# Cherry Studio 客户端优化
|
||||||
|
|
||||||
|
本文档说明针对 Cherry Studio 客户端的专属优化功能。
|
||||||
|
|
||||||
|
## 优化内容
|
||||||
|
|
||||||
|
### 1. 客户端名称日志显示
|
||||||
|
|
||||||
|
**功能描述**:
|
||||||
|
- 从请求头 `x-title` 中提取客户端名称
|
||||||
|
- 在日志中显示客户端信息,便于追踪和调试
|
||||||
|
- 支持任何设置 `x-title` 头的客户端,不限于 Cherry Studio
|
||||||
|
|
||||||
|
**日志格式**:
|
||||||
|
```
|
||||||
|
2025-12-10 15:09:17 - api.routers.chat - INFO - Chat completion request for model: google.gemini-2.5-pro, client: Cherry Studio
|
||||||
|
```
|
||||||
|
|
||||||
|
**实现位置**:
|
||||||
|
- [src/api/routers/chat.py](../src/api/routers/chat.py#L295-L296)
|
||||||
|
|
||||||
|
**使用示例**:
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-oci-genai-default-key" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [{"role": "user", "content": "Hello"}]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. thinking_budget 到 reasoning_effort 的自动映射
|
||||||
|
|
||||||
|
**功能描述**:
|
||||||
|
- Cherry Studio 使用 Google Gemini 的 `thinking_budget` 参数控制推理深度
|
||||||
|
- 网关自动将 `thinking_budget` 映射到 OCI SDK 的 `reasoning_effort` 参数
|
||||||
|
- 支持 meta、xai、google、openai 提供商的模型(不支持 Cohere)
|
||||||
|
- 对其他客户端透明,不影响标准 OpenAI API 兼容性
|
||||||
|
|
||||||
|
**映射规则**:
|
||||||
|
|
||||||
|
| thinking_budget 值 | reasoning_effort | 说明 |
|
||||||
|
|-------------------|------------------|------|
|
||||||
|
| ≤ 1760 | `low` | 快速响应,较少推理 |
|
||||||
|
| 1760 < X ≤ 16448 | `medium` | 平衡速度和推理深度 |
|
||||||
|
| > 16448 | `high` | 深度推理,更完整的答案 |
|
||||||
|
| -1 | None | 使用模型默认值 |
|
||||||
|
|
||||||
|
**extra_body 结构**:
|
||||||
|
|
||||||
|
Cherry Studio 通过 `extra_body` 传递 Google Gemini 特定的配置:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [...],
|
||||||
|
"extra_body": {
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": 1760,
|
||||||
|
"include_thoughts": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**实现位置**:
|
||||||
|
- 映射函数: [src/api/routers/chat.py](../src/api/routers/chat.py#L37-L102)
|
||||||
|
- `map_thinking_budget_to_reasoning_effort()` - 将 thinking_budget 数值映射到 reasoning_effort 枚举值
|
||||||
|
- `extract_reasoning_effort_from_extra_body()` - 从 extra_body 中提取 thinking_budget 并执行映射
|
||||||
|
- OCI 客户端: [src/core/oci_client.py](../src/core/oci_client.py#L333-L336)
|
||||||
|
|
||||||
|
**日志输出**:
|
||||||
|
```
|
||||||
|
2025-12-10 15:09:17 - api.routers.chat - INFO - Chat completion request for model: google.gemini-2.5-pro, client: Cherry Studio
|
||||||
|
2025-12-10 15:09:17 - api.routers.chat - INFO - Cherry Studio thinking_budget 1760 mapped to reasoning_effort: low
|
||||||
|
2025-12-10 15:09:17 - core.oci_client - INFO - Setting reasoning_effort to LOW for google model
|
||||||
|
```
|
||||||
|
|
||||||
|
## Cherry Studio 使用示例
|
||||||
|
|
||||||
|
### 基本对话
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-oci-genai-default-key" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello, how are you?"}
|
||||||
|
]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 使用 thinking_budget (低推理深度)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-oci-genai-default-key" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "What is 2+2?"}
|
||||||
|
],
|
||||||
|
"extra_body": {
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": 1000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 使用 thinking_budget (中等推理深度)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-oci-genai-default-key" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Explain quantum entanglement"}
|
||||||
|
],
|
||||||
|
"extra_body": {
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": 5000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 使用 thinking_budget (高推理深度)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-oci-genai-default-key" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Solve this complex math problem: ..."}
|
||||||
|
],
|
||||||
|
"extra_body": {
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": 20000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
## 验证日志
|
||||||
|
|
||||||
|
启动服务并查看日志以验证 Cherry Studio 优化功能:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 启动服务(开发模式)
|
||||||
|
cd src
|
||||||
|
python main.py
|
||||||
|
|
||||||
|
# 查看日志(另一个终端)
|
||||||
|
tail -f logs/app.log | grep -E "(client:|thinking_budget|reasoning_effort)"
|
||||||
|
```
|
||||||
|
|
||||||
|
期望看到的日志:
|
||||||
|
```
|
||||||
|
2025-12-10 15:09:17 - api.routers.chat - INFO - Chat completion request for model: google.gemini-2.5-pro, client: Cherry Studio
|
||||||
|
2025-12-10 15:09:17 - api.routers.chat - INFO - Cherry Studio thinking_budget 1760 mapped to reasoning_effort: low
|
||||||
|
2025-12-10 15:09:17 - core.oci_client - INFO - Setting reasoning_effort to LOW for google model
|
||||||
|
```
|
||||||
|
|
||||||
|
## 技术实现
|
||||||
|
|
||||||
|
### Schema 变更
|
||||||
|
|
||||||
|
在 [src/api/schemas.py](../src/api/schemas.py) 中添加了 `extra_body` 字段:
|
||||||
|
|
||||||
|
```python
|
||||||
|
class ChatCompletionRequest(BaseModel):
|
||||||
|
# ... 其他字段 ...
|
||||||
|
extra_body: Optional[Dict[str, Any]] = None # Cherry Studio and other client extensions
|
||||||
|
```
|
||||||
|
|
||||||
|
### 映射函数
|
||||||
|
|
||||||
|
实现了两个工具函数来处理 Cherry Studio 的 thinking_budget:
|
||||||
|
|
||||||
|
1. **map_thinking_budget_to_reasoning_effort**: 将 thinking_budget 数值映射到 reasoning_effort 枚举值
|
||||||
|
2. **extract_reasoning_effort_from_extra_body**: 从 extra_body 中提取 thinking_budget 并执行映射
|
||||||
|
|
||||||
|
```python
|
||||||
|
def map_thinking_budget_to_reasoning_effort(thinking_budget: int) -> Optional[str]:
|
||||||
|
"""Map Cherry Studio's thinking_budget to OCI's reasoning_effort parameter."""
|
||||||
|
if thinking_budget == -1:
|
||||||
|
return None
|
||||||
|
elif thinking_budget <= 1760:
|
||||||
|
return "low"
|
||||||
|
elif thinking_budget <= 16448:
|
||||||
|
return "medium"
|
||||||
|
else:
|
||||||
|
return "high"
|
||||||
|
|
||||||
|
def extract_reasoning_effort_from_extra_body(extra_body: Optional[dict]) -> Optional[str]:
|
||||||
|
"""Extract reasoning_effort from Cherry Studio's extra_body parameter."""
|
||||||
|
if not extra_body:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
google_config = extra_body.get("google", {})
|
||||||
|
thinking_config = google_config.get("thinking_config", {})
|
||||||
|
thinking_budget = thinking_config.get("thinking_budget")
|
||||||
|
|
||||||
|
if thinking_budget is not None and isinstance(thinking_budget, (int, float)):
|
||||||
|
effort = map_thinking_budget_to_reasoning_effort(int(thinking_budget))
|
||||||
|
if effort:
|
||||||
|
logger.info(f"Cherry Studio thinking_budget {thinking_budget} mapped to reasoning_effort: {effort}")
|
||||||
|
return effort
|
||||||
|
except (AttributeError, TypeError, KeyError) as e:
|
||||||
|
logger.debug(f"Failed to extract thinking_budget from extra_body: {e}")
|
||||||
|
|
||||||
|
return None
|
||||||
|
```
|
||||||
|
|
||||||
|
### OCI SDK 集成
|
||||||
|
|
||||||
|
更新了 `OCIGenAIClient.chat()` 方法和 `_build_generic_request()` 方法,支持传递 `reasoning_effort` 参数到 OCI SDK 的 `GenericChatRequest`。
|
||||||
|
|
||||||
|
## 兼容性
|
||||||
|
|
||||||
|
### 支持的模型
|
||||||
|
|
||||||
|
**reasoning_effort 参数支持**(通过 thinking_budget 映射):
|
||||||
|
|
||||||
|
- ✅ Google Gemini 模型 (google.gemini-2.5-pro, google.gemini-2.0-flash-exp)
|
||||||
|
- ✅ Meta Llama 模型 (meta.llama-3.1-405b-instruct, meta.llama-3.2-90b-vision-instruct)
|
||||||
|
- ✅ xAI 模型
|
||||||
|
- ✅ OpenAI 模型
|
||||||
|
- ❌ Cohere 模型(不支持 reasoning_effort 参数)
|
||||||
|
|
||||||
|
**注意**: reasoning_effort 是可选参数,如果模型不支持,会自动忽略并记录警告日志。
|
||||||
|
|
||||||
|
### 向后兼容性
|
||||||
|
|
||||||
|
- ✅ 不提供 `extra_body` 时,行为与之前完全一致
|
||||||
|
- ✅ 不提供 `x-title` 时,客户端名称显示为 "Unknown"
|
||||||
|
- ✅ 其他客户端不受影响,可以继续正常使用
|
||||||
|
- ✅ 标准 OpenAI API 兼容性完全保留
|
||||||
|
|
||||||
|
### 与其他客户端的兼容性
|
||||||
|
|
||||||
|
虽然此优化专为 Cherry Studio 设计,但实现方式确保了:
|
||||||
|
|
||||||
|
1. **其他客户端不受影响**:不使用 `extra_body.google.thinking_config` 的客户端完全不受影响
|
||||||
|
2. **标准 API 兼容**:所有标准 OpenAI API 功能仍然正常工作
|
||||||
|
|
||||||
|
## 故障排除
|
||||||
|
|
||||||
|
### 问题 1: thinking_budget 参数未生效
|
||||||
|
|
||||||
|
**症状**:日志中没有看到 "mapped to reasoning_effort" 消息
|
||||||
|
|
||||||
|
**解决方案**:
|
||||||
|
1. 确认 `extra_body` 结构正确,嵌套路径为 `extra_body.google.thinking_config.thinking_budget`
|
||||||
|
2. 确认使用的是支持的模型(meta、xai、google、openai,不支持 Cohere)
|
||||||
|
3. 检查 thinking_budget 值是否有效(非 null 的数字)
|
||||||
|
4. 查看日志中是否有错误或警告信息
|
||||||
|
|
||||||
|
**验证 extra_body 结构**:
|
||||||
|
```bash
|
||||||
|
# 正确的结构
|
||||||
|
{
|
||||||
|
"extra_body": {
|
||||||
|
"google": { # 必须是 "google" 键
|
||||||
|
"thinking_config": { # 必须是 "thinking_config" 键
|
||||||
|
"thinking_budget": 5000 # 必须是 "thinking_budget" 键,值为数字
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 问题 2: 客户端名称显示为 "Unknown"
|
||||||
|
|
||||||
|
**症状**:日志中客户端显示为 "Unknown" 而不是 "Cherry Studio"
|
||||||
|
|
||||||
|
**解决方案**:
|
||||||
|
1. 确认请求头中包含 `x-title` 字段
|
||||||
|
2. 检查 Cherry Studio 是否正确设置了自定义请求头
|
||||||
|
3. 尝试手动添加请求头进行测试
|
||||||
|
|
||||||
|
**测试命令**:
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-oci-genai-default-key" \
|
||||||
|
-d '{"model": "google.gemini-2.5-pro", "messages": [{"role": "user", "content": "test"}]}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 问题 3: thinking_budget 映射到错误的 reasoning_effort
|
||||||
|
|
||||||
|
**症状**:期望的 reasoning_effort 与实际不符
|
||||||
|
|
||||||
|
**验证映射规则**:
|
||||||
|
- thinking_budget ≤ 1760 → low
|
||||||
|
- 1760 < thinking_budget ≤ 16448 → medium
|
||||||
|
- thinking_budget > 16448 → high
|
||||||
|
- thinking_budget = -1 → None (使用模型默认)
|
||||||
|
|
||||||
|
**示例**:
|
||||||
|
```python
|
||||||
|
# thinking_budget = 1000 → low ✓
|
||||||
|
# thinking_budget = 5000 → medium ✓
|
||||||
|
# thinking_budget = 20000 → high ✓
|
||||||
|
# thinking_budget = -1 → None (默认) ✓
|
||||||
|
```
|
||||||
|
|
||||||
|
## 测试
|
||||||
|
|
||||||
|
### 自动化测试
|
||||||
|
|
||||||
|
运行 Cherry Studio 优化测试脚本:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./tests/test_cherry_studio_optimization.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
测试脚本会验证以下场景:
|
||||||
|
1. thinking_budget = 1000 → reasoning_effort = low
|
||||||
|
2. thinking_budget = 5000 → reasoning_effort = medium
|
||||||
|
3. thinking_budget = 20000 → reasoning_effort = high
|
||||||
|
4. thinking_budget = -1 → 使用模型默认值
|
||||||
|
5. 无 extra_body(正常请求)
|
||||||
|
6. 不同客户端名称(验证 x-title 识别)
|
||||||
|
|
||||||
|
## 参考资料
|
||||||
|
|
||||||
|
- [OCI GenAI Python SDK - GenericChatRequest](https://docs.oracle.com/en-us/iaas/tools/python/latest/api/generative_ai_inference/models/oci.generative_ai_inference.models.GenericChatRequest.html)
|
||||||
|
- [OpenAI API - Reasoning Models](https://platform.openai.com/docs/guides/reasoning)
|
||||||
|
- [Google Gemini - Thinking](https://ai.google.dev/gemini-api/docs/thinking)
|
||||||
750
docs/ENVIRONMENT_VARIABLES.md
Normal file
750
docs/ENVIRONMENT_VARIABLES.md
Normal file
@@ -0,0 +1,750 @@
|
|||||||
|
# 环境变量配置说明
|
||||||
|
|
||||||
|
本文档详细说明 OCI GenAI 网关支持的所有环境变量及其配置方法。
|
||||||
|
|
||||||
|
## 📋 目录
|
||||||
|
|
||||||
|
- [快速配置](#快速配置)
|
||||||
|
- [API 设置](#api-设置)
|
||||||
|
- [认证设置](#认证设置)
|
||||||
|
- [OCI 配置](#oci-配置)
|
||||||
|
- [模型设置](#模型设置)
|
||||||
|
- [嵌入设置](#嵌入设置)
|
||||||
|
- [流式响应设置](#流式响应设置)
|
||||||
|
- [日志设置](#日志设置)
|
||||||
|
- [配置示例](#配置示例)
|
||||||
|
- [常见配置场景](#常见配置场景)
|
||||||
|
|
||||||
|
## 快速配置
|
||||||
|
|
||||||
|
1. 复制示例配置文件:
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
```
|
||||||
|
|
||||||
|
2. 编辑 `.env` 文件,至少配置以下必需项:
|
||||||
|
```bash
|
||||||
|
API_KEYS=["sk-your-secret-key"]
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT
|
||||||
|
```
|
||||||
|
|
||||||
|
3. 确保 OCI 配置文件存在:
|
||||||
|
```bash
|
||||||
|
cat ~/.oci/config
|
||||||
|
```
|
||||||
|
|
||||||
|
## API 设置
|
||||||
|
|
||||||
|
### API_TITLE
|
||||||
|
|
||||||
|
- **说明**:API 服务的标题,显示在 OpenAPI 文档中
|
||||||
|
- **类型**:字符串
|
||||||
|
- **默认值**:`OCI GenAI to OpenAI API Gateway`
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
API_TITLE=My AI Gateway
|
||||||
|
```
|
||||||
|
|
||||||
|
### API_VERSION
|
||||||
|
|
||||||
|
- **说明**:API 服务的版本号
|
||||||
|
- **类型**:字符串
|
||||||
|
- **默认值**:`0.0.1`
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
API_VERSION=1.0.0
|
||||||
|
```
|
||||||
|
|
||||||
|
### API_PREFIX
|
||||||
|
|
||||||
|
- **说明**:API 路由前缀,符合 OpenAI API 规范
|
||||||
|
- **类型**:字符串
|
||||||
|
- **默认值**:`/v1`
|
||||||
|
- **可选值**:任何有效的 URL 路径
|
||||||
|
- **注意**:不建议修改,以保持与 OpenAI SDK 的兼容性
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
API_PREFIX=/v1
|
||||||
|
```
|
||||||
|
|
||||||
|
### API_PORT
|
||||||
|
|
||||||
|
- **说明**:服务监听端口
|
||||||
|
- **类型**:整数
|
||||||
|
- **默认值**:`8000`
|
||||||
|
- **范围**:1-65535
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
API_PORT=8080
|
||||||
|
```
|
||||||
|
|
||||||
|
### API_HOST
|
||||||
|
|
||||||
|
- **说明**:服务监听地址
|
||||||
|
- **类型**:字符串
|
||||||
|
- **默认值**:`0.0.0.0`(监听所有网络接口)
|
||||||
|
- **可选值**:
|
||||||
|
- `0.0.0.0` - 监听所有接口(生产环境)
|
||||||
|
- `127.0.0.1` - 仅本地访问(开发环境)
|
||||||
|
- 特定 IP 地址
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
API_HOST=127.0.0.1
|
||||||
|
```
|
||||||
|
|
||||||
|
### DEBUG
|
||||||
|
|
||||||
|
- **说明**:启用调试模式
|
||||||
|
- **类型**:布尔值
|
||||||
|
- **默认值**:`false`
|
||||||
|
- **可选值**:`true` / `false`
|
||||||
|
- **影响**:
|
||||||
|
- 启用时会显示详细的错误堆栈
|
||||||
|
- 自动重载代码变更
|
||||||
|
- 启用 FastAPI 的交互式文档
|
||||||
|
- **注意**:生产环境应设置为 `false`
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
DEBUG=true
|
||||||
|
```
|
||||||
|
|
||||||
|
## 认证设置
|
||||||
|
|
||||||
|
### API_KEYS
|
||||||
|
|
||||||
|
- **说明**:API 密钥列表,用于客户端认证
|
||||||
|
- **类型**:JSON 数组
|
||||||
|
- **默认值**:`["sk-oci-genai-default-key"]`
|
||||||
|
- **格式**:JSON 数组字符串
|
||||||
|
- **用途**:客户端通过 `Authorization: Bearer <key>` 头进行认证
|
||||||
|
- **安全建议**:
|
||||||
|
- 使用强密钥(至少 32 个字符)
|
||||||
|
- 定期轮换密钥
|
||||||
|
- 不同环境使用不同的密钥
|
||||||
|
- 不要将密钥提交到版本控制系统
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
# 单个密钥
|
||||||
|
API_KEYS=["sk-prod-a1b2c3d4e5f6g7h8"]
|
||||||
|
|
||||||
|
# 多个密钥(支持不同的客户端)
|
||||||
|
API_KEYS=["sk-admin-key123","sk-user-key456","sk-app-key789"]
|
||||||
|
```
|
||||||
|
|
||||||
|
## OCI 配置
|
||||||
|
|
||||||
|
### OCI_CONFIG_FILE
|
||||||
|
|
||||||
|
- **说明**:OCI 配置文件路径
|
||||||
|
- **类型**:字符串(文件路径)
|
||||||
|
- **默认值**:`~/.oci/config`
|
||||||
|
- **用途**:指定 OCI SDK 使用的配置文件位置
|
||||||
|
- **配置文件格式**:
|
||||||
|
```ini
|
||||||
|
[DEFAULT]
|
||||||
|
user=ocid1.user.oc1...
|
||||||
|
fingerprint=aa:bb:cc:dd...
|
||||||
|
key_file=~/.oci/oci_api_key.pem
|
||||||
|
tenancy=ocid1.tenancy.oc1...
|
||||||
|
region=us-chicago-1
|
||||||
|
```
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
OCI_CONFIG_FILE=~/.oci/config
|
||||||
|
OCI_CONFIG_FILE=/custom/path/to/oci_config
|
||||||
|
```
|
||||||
|
|
||||||
|
### OCI_CONFIG_PROFILE
|
||||||
|
|
||||||
|
- **说明**:OCI 配置文件中的 profile 名称
|
||||||
|
- **类型**:字符串(支持逗号分隔的多个值)
|
||||||
|
- **默认值**:`DEFAULT`
|
||||||
|
- **用途**:
|
||||||
|
- 单个 profile:使用指定的 OCI 配置
|
||||||
|
- 多个 profiles:自动 round-robin 负载均衡
|
||||||
|
- **要求**:每个 profile 必须包含 `region` 和 `tenancy` 字段
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
# 单配置
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT
|
||||||
|
|
||||||
|
# 多配置(负载均衡)
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT,CHICAGO,ASHBURN
|
||||||
|
|
||||||
|
# 跨区域配置
|
||||||
|
OCI_CONFIG_PROFILE=US_WEST,US_EAST,EU_FRANKFURT
|
||||||
|
```
|
||||||
|
|
||||||
|
### OCI_AUTH_TYPE
|
||||||
|
|
||||||
|
- **说明**:OCI 认证类型
|
||||||
|
- **类型**:字符串
|
||||||
|
- **默认值**:`api_key`
|
||||||
|
- **可选值**:
|
||||||
|
- `api_key` - 使用 API 密钥认证(推荐用于本地开发)
|
||||||
|
- `instance_principal` - 使用实例主体认证(推荐用于 OCI 实例)
|
||||||
|
- **使用场景**:
|
||||||
|
- **api_key**:本地开发、Docker 容器、非 OCI 环境
|
||||||
|
- **instance_principal**:OCI Compute 实例、Container Engine、Functions
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
OCI_AUTH_TYPE=api_key
|
||||||
|
OCI_AUTH_TYPE=instance_principal
|
||||||
|
```
|
||||||
|
|
||||||
|
### OCI_CONNECT_TIMEOUT
|
||||||
|
|
||||||
|
- **说明**:OCI API 连接超时时间(秒)
|
||||||
|
- **类型**:整数
|
||||||
|
- **默认值**:`10`
|
||||||
|
- **范围**:1-300
|
||||||
|
- **用途**:限制建立与 OCI API 连接的最大时间
|
||||||
|
- **调优建议**:
|
||||||
|
- 网络稳定:保持默认值(10 秒)
|
||||||
|
- 网络不稳定:增加到 20-30 秒
|
||||||
|
- 快速失败:减少到 5 秒
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
OCI_CONNECT_TIMEOUT=10
|
||||||
|
OCI_CONNECT_TIMEOUT=30 # 网络较慢时
|
||||||
|
```
|
||||||
|
|
||||||
|
### OCI_READ_TIMEOUT
|
||||||
|
|
||||||
|
- **说明**:OCI API 读取超时时间(秒)
|
||||||
|
- **类型**:整数
|
||||||
|
- **默认值**:`360`(6 分钟)
|
||||||
|
- **范围**:30-600
|
||||||
|
- **用途**:限制等待 OCI API 响应的最大时间
|
||||||
|
- **调优建议**:
|
||||||
|
- 简单查询:120 秒
|
||||||
|
- 复杂对话:300-360 秒
|
||||||
|
- 长文档处理:600 秒
|
||||||
|
- **注意**:设置过小可能导致长时间运行的请求超时
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
OCI_READ_TIMEOUT=360
|
||||||
|
OCI_READ_TIMEOUT=600 # 处理长文档时
|
||||||
|
```
|
||||||
|
|
||||||
|
### GENAI_ENDPOINT
|
||||||
|
|
||||||
|
- **说明**:专用模型端点(可选)
|
||||||
|
- **类型**:字符串(URL)
|
||||||
|
- **默认值**:无(自动根据 region 构建)
|
||||||
|
- **用途**:指定自定义的 OCI GenAI 端点
|
||||||
|
- **使用场景**:
|
||||||
|
- 使用专用端点
|
||||||
|
- 测试环境
|
||||||
|
- 企业私有部署
|
||||||
|
- **注意**:通常不需要设置,系统会自动使用正确的端点
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
GENAI_ENDPOINT=https://your-dedicated-endpoint.oraclecloud.com
|
||||||
|
```
|
||||||
|
|
||||||
|
## 模型设置
|
||||||
|
|
||||||
|
### MAX_TOKENS
|
||||||
|
|
||||||
|
- **说明**:默认最大 token 数
|
||||||
|
- **类型**:整数
|
||||||
|
- **默认值**:`4096`
|
||||||
|
- **范围**:1-模型最大限制
|
||||||
|
- **用途**:当客户端未指定 `max_tokens` 时使用
|
||||||
|
- **不同模型的限制**:
|
||||||
|
- Cohere Command R+:128k
|
||||||
|
- Meta Llama 3.1 405B:128k
|
||||||
|
- Google Gemini 2.5 Pro:2M
|
||||||
|
- **注意**:实际限制取决于具体模型
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
MAX_TOKENS=4096
|
||||||
|
MAX_TOKENS=8192 # 长对话场景
|
||||||
|
```
|
||||||
|
|
||||||
|
### TEMPERATURE
|
||||||
|
|
||||||
|
- **说明**:默认温度参数
|
||||||
|
- **类型**:浮点数
|
||||||
|
- **默认值**:`0.7`
|
||||||
|
- **范围**:0.0-2.0
|
||||||
|
- **用途**:控制生成文本的随机性
|
||||||
|
- **效果**:
|
||||||
|
- 0.0:确定性输出(适合事实查询)
|
||||||
|
- 0.7:平衡创造性和准确性(默认)
|
||||||
|
- 1.0-2.0:更有创造性(适合创意写作)
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
TEMPERATURE=0.7
|
||||||
|
TEMPERATURE=0.0 # 事实性问答
|
||||||
|
TEMPERATURE=1.2 # 创意写作
|
||||||
|
```
|
||||||
|
|
||||||
|
## 嵌入设置
|
||||||
|
|
||||||
|
### EMBED_TRUNCATE
|
||||||
|
|
||||||
|
- **说明**:嵌入文本截断策略
|
||||||
|
- **类型**:字符串
|
||||||
|
- **默认值**:`END`
|
||||||
|
- **可选值**:
|
||||||
|
- `END` - 保留文本开头,截断末尾
|
||||||
|
- `START` - 保留文本末尾,截断开头
|
||||||
|
- **用途**:当输入文本超过模型限制时的处理方式
|
||||||
|
- **使用场景**:
|
||||||
|
- **END**:搜索查询、文档摘要(重点在开头)
|
||||||
|
- **START**:对话历史、日志分析(重点在结尾)
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
EMBED_TRUNCATE=END
|
||||||
|
EMBED_TRUNCATE=START
|
||||||
|
```
|
||||||
|
|
||||||
|
## 流式响应设置
|
||||||
|
|
||||||
|
### ENABLE_STREAMING
|
||||||
|
|
||||||
|
- **说明**:全局流式响应开关
|
||||||
|
- **类型**:布尔值
|
||||||
|
- **默认值**:`true`
|
||||||
|
- **可选值**:`true` / `false`
|
||||||
|
- **用途**:控制是否允许流式响应
|
||||||
|
- **行为**:
|
||||||
|
- `true`:允许流式响应(客户端需设置 `stream=true`)
|
||||||
|
- `false`:强制禁用流式响应(即使客户端设置 `stream=true`)
|
||||||
|
- **使用场景**:
|
||||||
|
- 启用:交互式聊天、实时响应
|
||||||
|
- 禁用:批处理、API 集成测试
|
||||||
|
- **注意**:设置为 `false` 会覆盖客户端的流式请求
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
ENABLE_STREAMING=true
|
||||||
|
ENABLE_STREAMING=false # 调试或批处理时
|
||||||
|
```
|
||||||
|
|
||||||
|
### STREAM_CHUNK_SIZE
|
||||||
|
|
||||||
|
- **说明**:模拟流式响应的分块大小(字符数)
|
||||||
|
- **类型**:整数
|
||||||
|
- **默认值**:`1024`
|
||||||
|
- **范围**:100-4096
|
||||||
|
- **用途**:仅在 OCI 返回非流式响应时使用(fallback 模式)
|
||||||
|
- **调优建议**:
|
||||||
|
- 快速网络:1024-2048
|
||||||
|
- 慢速网络:512-1024
|
||||||
|
- 视觉效果优先:256-512
|
||||||
|
- **注意**:不影响真实流式响应的性能
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
STREAM_CHUNK_SIZE=1024
|
||||||
|
STREAM_CHUNK_SIZE=512 # 更频繁的更新
|
||||||
|
```
|
||||||
|
|
||||||
|
## 日志设置
|
||||||
|
|
||||||
|
### LOG_LEVEL
|
||||||
|
|
||||||
|
- **说明**:日志级别
|
||||||
|
- **类型**:字符串
|
||||||
|
- **默认值**:`INFO`
|
||||||
|
- **可选值**:
|
||||||
|
- `DEBUG` - 详细调试信息(包含所有日志)
|
||||||
|
- `INFO` - 一般信息(推荐生产环境)
|
||||||
|
- `WARNING` - 警告信息
|
||||||
|
- `ERROR` - 错误信息
|
||||||
|
- `CRITICAL` - 严重错误
|
||||||
|
- **使用场景**:
|
||||||
|
- 开发环境:`DEBUG`
|
||||||
|
- 生产环境:`INFO` 或 `WARNING`
|
||||||
|
- 最小日志:`ERROR`
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
LOG_LEVEL=DEBUG # 开发调试
|
||||||
|
```
|
||||||
|
|
||||||
|
### LOG_REQUESTS
|
||||||
|
|
||||||
|
- **说明**:启用请求详细日志
|
||||||
|
- **类型**:布尔值
|
||||||
|
- **默认值**:`false`
|
||||||
|
- **可选值**:`true` / `false`
|
||||||
|
- **用途**:记录所有传入请求的详细信息
|
||||||
|
- **包含内容**:
|
||||||
|
- HTTP 方法和 URL
|
||||||
|
- 查询参数
|
||||||
|
- 请求头(敏感信息自动过滤)
|
||||||
|
- 请求体(JSON 格式化)
|
||||||
|
- **性能影响**:轻微(主要是日志写入)
|
||||||
|
- **安全性**:自动过滤 API 密钥等敏感信息
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
LOG_REQUESTS=false
|
||||||
|
LOG_REQUESTS=true # 调试 API 集成时
|
||||||
|
```
|
||||||
|
|
||||||
|
### LOG_RESPONSES
|
||||||
|
|
||||||
|
- **说明**:启用响应详细日志
|
||||||
|
- **类型**:布尔值
|
||||||
|
- **默认值**:`false`
|
||||||
|
- **可选值**:`true` / `false`
|
||||||
|
- **用途**:记录所有发出响应的详细信息
|
||||||
|
- **包含内容**:
|
||||||
|
- HTTP 状态码
|
||||||
|
- 响应处理时间
|
||||||
|
- 响应头
|
||||||
|
- 响应体(JSON 格式化)
|
||||||
|
- **注意**:流式响应不会记录完整响应体
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
LOG_RESPONSES=false
|
||||||
|
LOG_RESPONSES=true # 调试响应格式时
|
||||||
|
```
|
||||||
|
|
||||||
|
### LOG_FILE
|
||||||
|
|
||||||
|
- **说明**:日志文件路径
|
||||||
|
- **类型**:字符串(文件路径)
|
||||||
|
- **默认值**:`./logs/app.log`
|
||||||
|
- **用途**:指定日志文件保存位置
|
||||||
|
- **行为**:
|
||||||
|
- 如果未设置,仅输出到控制台
|
||||||
|
- 如果设置,同时输出到文件和控制台
|
||||||
|
- **注意**:目录必须存在或有创建权限
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
LOG_FILE=./logs/app.log
|
||||||
|
LOG_FILE=/var/log/oci-genai/app.log
|
||||||
|
```
|
||||||
|
|
||||||
|
### LOG_FILE_MAX_SIZE
|
||||||
|
|
||||||
|
- **说明**:单个日志文件最大大小(MB)
|
||||||
|
- **类型**:整数
|
||||||
|
- **默认值**:`10`
|
||||||
|
- **范围**:1-1000
|
||||||
|
- **用途**:日志文件轮转的大小限制
|
||||||
|
- **行为**:超过限制时自动创建新文件
|
||||||
|
- **建议值**:
|
||||||
|
- 低流量:10 MB
|
||||||
|
- 中等流量:50 MB
|
||||||
|
- 高流量:100-200 MB
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
LOG_FILE_MAX_SIZE=10
|
||||||
|
LOG_FILE_MAX_SIZE=50 # 高流量场景
|
||||||
|
```
|
||||||
|
|
||||||
|
### LOG_FILE_BACKUP_COUNT
|
||||||
|
|
||||||
|
- **说明**:保留的备份日志文件数量
|
||||||
|
- **类型**:整数
|
||||||
|
- **默认值**:`5`
|
||||||
|
- **范围**:0-100
|
||||||
|
- **用途**:控制日志文件轮转时保留的历史文件数
|
||||||
|
- **存储计算**:总空间 = MAX_SIZE × (BACKUP_COUNT + 1)
|
||||||
|
- **示例**:
|
||||||
|
```bash
|
||||||
|
LOG_FILE_BACKUP_COUNT=5
|
||||||
|
LOG_FILE_BACKUP_COUNT=10 # 需要更长的历史记录
|
||||||
|
```
|
||||||
|
|
||||||
|
## 配置示例
|
||||||
|
|
||||||
|
### 开发环境配置
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 开发环境 - 本地调试
|
||||||
|
DEBUG=true
|
||||||
|
LOG_LEVEL=DEBUG
|
||||||
|
LOG_REQUESTS=true
|
||||||
|
LOG_RESPONSES=true
|
||||||
|
|
||||||
|
API_PORT=8000
|
||||||
|
API_HOST=127.0.0.1
|
||||||
|
|
||||||
|
API_KEYS=["sk-dev-key-123"]
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT
|
||||||
|
OCI_AUTH_TYPE=api_key
|
||||||
|
|
||||||
|
MAX_TOKENS=4096
|
||||||
|
TEMPERATURE=0.7
|
||||||
|
|
||||||
|
ENABLE_STREAMING=true
|
||||||
|
STREAM_CHUNK_SIZE=512
|
||||||
|
|
||||||
|
LOG_FILE=./logs/dev.log
|
||||||
|
LOG_FILE_MAX_SIZE=10
|
||||||
|
LOG_FILE_BACKUP_COUNT=3
|
||||||
|
```
|
||||||
|
|
||||||
|
### 生产环境配置
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 生产环境 - 多区域负载均衡
|
||||||
|
DEBUG=false
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
LOG_REQUESTS=false
|
||||||
|
LOG_RESPONSES=false
|
||||||
|
|
||||||
|
API_PORT=8000
|
||||||
|
API_HOST=0.0.0.0
|
||||||
|
|
||||||
|
# 使用强密钥
|
||||||
|
API_KEYS=["sk-prod-a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6"]
|
||||||
|
|
||||||
|
# 多区域配置
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT,CHICAGO,ASHBURN
|
||||||
|
OCI_AUTH_TYPE=api_key
|
||||||
|
|
||||||
|
# 超时配置
|
||||||
|
OCI_CONNECT_TIMEOUT=15
|
||||||
|
OCI_READ_TIMEOUT=360
|
||||||
|
|
||||||
|
# 模型配置
|
||||||
|
MAX_TOKENS=4096
|
||||||
|
TEMPERATURE=0.7
|
||||||
|
|
||||||
|
# 流式配置
|
||||||
|
ENABLE_STREAMING=true
|
||||||
|
STREAM_CHUNK_SIZE=1024
|
||||||
|
|
||||||
|
# 日志配置
|
||||||
|
LOG_FILE=/var/log/oci-genai/app.log
|
||||||
|
LOG_FILE_MAX_SIZE=50
|
||||||
|
LOG_FILE_BACKUP_COUNT=10
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker 容器配置
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Docker 环境
|
||||||
|
DEBUG=false
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
API_PORT=8000
|
||||||
|
API_HOST=0.0.0.0
|
||||||
|
|
||||||
|
API_KEYS=["sk-docker-key-abc123"]
|
||||||
|
OCI_CONFIG_FILE=/app/.oci/config
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT
|
||||||
|
OCI_AUTH_TYPE=api_key
|
||||||
|
|
||||||
|
# 适当的超时设置
|
||||||
|
OCI_CONNECT_TIMEOUT=20
|
||||||
|
OCI_READ_TIMEOUT=360
|
||||||
|
|
||||||
|
ENABLE_STREAMING=true
|
||||||
|
|
||||||
|
# 容器内日志路径
|
||||||
|
LOG_FILE=/app/logs/app.log
|
||||||
|
LOG_FILE_MAX_SIZE=20
|
||||||
|
LOG_FILE_BACKUP_COUNT=5
|
||||||
|
```
|
||||||
|
|
||||||
|
### OCI 实例配置
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# OCI Compute 实例 - 使用实例主体认证
|
||||||
|
DEBUG=false
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
API_PORT=8000
|
||||||
|
API_HOST=0.0.0.0
|
||||||
|
|
||||||
|
API_KEYS=["sk-instance-key-xyz789"]
|
||||||
|
|
||||||
|
# 使用实例主体认证
|
||||||
|
OCI_AUTH_TYPE=instance_principal
|
||||||
|
# 注意:使用实例主体时不需要 OCI_CONFIG_FILE
|
||||||
|
|
||||||
|
ENABLE_STREAMING=true
|
||||||
|
|
||||||
|
LOG_FILE=/var/log/oci-genai/app.log
|
||||||
|
LOG_FILE_MAX_SIZE=50
|
||||||
|
LOG_FILE_BACKUP_COUNT=10
|
||||||
|
```
|
||||||
|
|
||||||
|
## 常见配置场景
|
||||||
|
|
||||||
|
### 场景 1: 单区域简单部署
|
||||||
|
|
||||||
|
```bash
|
||||||
|
API_KEYS=["sk-simple-key"]
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT
|
||||||
|
OCI_AUTH_TYPE=api_key
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
```
|
||||||
|
|
||||||
|
### 场景 2: 多区域高可用部署
|
||||||
|
|
||||||
|
```bash
|
||||||
|
API_KEYS=["sk-ha-key-primary","sk-ha-key-backup"]
|
||||||
|
OCI_CONFIG_PROFILE=US_EAST,US_WEST,EU_FRANKFURT
|
||||||
|
OCI_AUTH_TYPE=api_key
|
||||||
|
OCI_CONNECT_TIMEOUT=20
|
||||||
|
OCI_READ_TIMEOUT=360
|
||||||
|
LOG_LEVEL=WARNING
|
||||||
|
```
|
||||||
|
|
||||||
|
### 场景 3: 调试和开发
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DEBUG=true
|
||||||
|
LOG_LEVEL=DEBUG
|
||||||
|
LOG_REQUESTS=true
|
||||||
|
LOG_RESPONSES=true
|
||||||
|
API_HOST=127.0.0.1
|
||||||
|
STREAM_CHUNK_SIZE=256
|
||||||
|
```
|
||||||
|
|
||||||
|
### 场景 4: 高性能生产环境
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DEBUG=false
|
||||||
|
LOG_LEVEL=WARNING
|
||||||
|
LOG_REQUESTS=false
|
||||||
|
LOG_RESPONSES=false
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT,REGION2,REGION3
|
||||||
|
ENABLE_STREAMING=true
|
||||||
|
MAX_TOKENS=8192
|
||||||
|
OCI_READ_TIMEOUT=600
|
||||||
|
LOG_FILE_MAX_SIZE=100
|
||||||
|
LOG_FILE_BACKUP_COUNT=20
|
||||||
|
```
|
||||||
|
|
||||||
|
### 场景 5: 批处理/API 测试
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ENABLE_STREAMING=false
|
||||||
|
MAX_TOKENS=2048
|
||||||
|
TEMPERATURE=0.0
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
LOG_REQUESTS=true
|
||||||
|
LOG_RESPONSES=true
|
||||||
|
```
|
||||||
|
|
||||||
|
## 环境变量优先级
|
||||||
|
|
||||||
|
配置加载顺序(后者覆盖前者):
|
||||||
|
|
||||||
|
1. 应用默认值(代码中定义)
|
||||||
|
2. `.env` 文件
|
||||||
|
3. 系统环境变量
|
||||||
|
4. OCI 配置文件(`~/.oci/config`)
|
||||||
|
|
||||||
|
**示例**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# .env 文件中
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
|
||||||
|
# 命令行覆盖
|
||||||
|
LOG_LEVEL=DEBUG python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
## 配置验证
|
||||||
|
|
||||||
|
### 检查配置是否生效
|
||||||
|
|
||||||
|
启动服务后查看日志:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd src
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
查看启动日志确认配置:
|
||||||
|
|
||||||
|
```
|
||||||
|
2025-12-10 10:00:00 - INFO - Starting OCI GenAI Gateway
|
||||||
|
2025-12-10 10:00:00 - INFO - API Port: 8000
|
||||||
|
2025-12-10 10:00:00 - INFO - OCI Profiles: DEFAULT, CHICAGO
|
||||||
|
2025-12-10 10:00:00 - INFO - Streaming: Enabled
|
||||||
|
2025-12-10 10:00:00 - INFO - Log Level: INFO
|
||||||
|
```
|
||||||
|
|
||||||
|
### 常见配置错误
|
||||||
|
|
||||||
|
1. **API_KEYS 格式错误**
|
||||||
|
```bash
|
||||||
|
# 错误
|
||||||
|
API_KEYS=sk-key-123
|
||||||
|
|
||||||
|
# 正确
|
||||||
|
API_KEYS=["sk-key-123"]
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **布尔值格式错误**
|
||||||
|
```bash
|
||||||
|
# 错误
|
||||||
|
DEBUG=True
|
||||||
|
ENABLE_STREAMING=yes
|
||||||
|
|
||||||
|
# 正确
|
||||||
|
DEBUG=true
|
||||||
|
ENABLE_STREAMING=true
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **路径错误**
|
||||||
|
```bash
|
||||||
|
# 错误(相对路径不明确)
|
||||||
|
OCI_CONFIG_FILE=oci/config
|
||||||
|
|
||||||
|
# 正确
|
||||||
|
OCI_CONFIG_FILE=~/.oci/config
|
||||||
|
OCI_CONFIG_FILE=/absolute/path/to/config
|
||||||
|
```
|
||||||
|
|
||||||
|
## 安全建议
|
||||||
|
|
||||||
|
1. **保护 API 密钥**
|
||||||
|
- 使用强密钥(至少 32 个字符)
|
||||||
|
- 不要将 `.env` 文件提交到版本控制
|
||||||
|
- 定期轮换密钥
|
||||||
|
|
||||||
|
2. **生产环境设置**
|
||||||
|
- `DEBUG=false`
|
||||||
|
- `LOG_LEVEL=INFO` 或 `WARNING`
|
||||||
|
- `LOG_REQUESTS=false`
|
||||||
|
- `LOG_RESPONSES=false`
|
||||||
|
|
||||||
|
3. **日志管理**
|
||||||
|
- 定期清理旧日志
|
||||||
|
- 限制日志文件大小
|
||||||
|
- 确保日志不包含敏感信息
|
||||||
|
|
||||||
|
## 故障排除
|
||||||
|
|
||||||
|
### 配置未生效
|
||||||
|
|
||||||
|
1. 检查 `.env` 文件是否在正确位置
|
||||||
|
2. 确认环境变量名称拼写正确
|
||||||
|
3. 检查值的格式(JSON、布尔值等)
|
||||||
|
4. 查看启动日志确认配置加载
|
||||||
|
|
||||||
|
### 连接超时
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 增加超时时间
|
||||||
|
OCI_CONNECT_TIMEOUT=30
|
||||||
|
OCI_READ_TIMEOUT=600
|
||||||
|
```
|
||||||
|
|
||||||
|
### 日志文件无法创建
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 检查目录是否存在
|
||||||
|
mkdir -p logs
|
||||||
|
|
||||||
|
# 检查权限
|
||||||
|
chmod 755 logs
|
||||||
|
```
|
||||||
|
|
||||||
|
## 参考资料
|
||||||
|
|
||||||
|
- [.env.example](../.env.example) - 完整的配置示例文件
|
||||||
|
- [OCI SDK 配置](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm) - OCI 配置文件格式
|
||||||
|
- [FastAPI Settings](https://fastapi.tiangolo.com/advanced/settings/) - FastAPI 设置管理
|
||||||
@@ -342,8 +342,7 @@ Service generativeai is not available in region us-sanjose-1
|
|||||||
- [OCI Generative AI 官方文档](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm)
|
- [OCI Generative AI 官方文档](https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm)
|
||||||
- [OCI CLI 配置指南](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm)
|
- [OCI CLI 配置指南](https://docs.oracle.com/en-us/iaas/Content/API/Concepts/sdkconfig.htm)
|
||||||
- [OCI IAM 策略参考](https://docs.oracle.com/en-us/iaas/Content/Identity/Concepts/policygetstarted.htm)
|
- [OCI IAM 策略参考](https://docs.oracle.com/en-us/iaas/Content/Identity/Concepts/policygetstarted.htm)
|
||||||
- [项目 README](README.md)
|
- [项目 README](../README.md)
|
||||||
- [开发文档 CLAUDE.md](CLAUDE.md)
|
|
||||||
|
|
||||||
## 🆘 获取帮助
|
## 🆘 获取帮助
|
||||||
|
|
||||||
@@ -5,8 +5,8 @@ import asyncio
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import uuid
|
import uuid
|
||||||
from typing import AsyncIterator, Union
|
from typing import AsyncIterator, Union, Optional
|
||||||
from fastapi import APIRouter, Depends, HTTPException
|
from fastapi import APIRouter, Depends, HTTPException, Request
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
|
|
||||||
from oci.exceptions import ServiceError
|
from oci.exceptions import ServiceError
|
||||||
@@ -34,6 +34,74 @@ router = APIRouter(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def map_thinking_budget_to_reasoning_effort(thinking_budget: int) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Map Cherry Studio's thinking_budget to OCI's reasoning_effort parameter.
|
||||||
|
|
||||||
|
Mapping rules:
|
||||||
|
- thinking_budget ≤ 1760: "low"
|
||||||
|
- 1760 < thinking_budget ≤ 16448: "medium"
|
||||||
|
- thinking_budget > 16448: "high"
|
||||||
|
- thinking_budget == -1: None (use model default)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
thinking_budget: The thinking budget value from Cherry Studio
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The corresponding reasoning_effort value or None
|
||||||
|
"""
|
||||||
|
if thinking_budget == -1:
|
||||||
|
return None
|
||||||
|
elif thinking_budget <= 1760:
|
||||||
|
return "low"
|
||||||
|
elif thinking_budget <= 16448:
|
||||||
|
return "medium"
|
||||||
|
else:
|
||||||
|
return "high"
|
||||||
|
|
||||||
|
|
||||||
|
def extract_reasoning_effort_from_extra_body(extra_body: Optional[dict]) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Extract reasoning_effort from Cherry Studio's extra_body parameter.
|
||||||
|
|
||||||
|
Example extra_body structure:
|
||||||
|
{
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": 1760,
|
||||||
|
"include_thoughts": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Args:
|
||||||
|
extra_body: The extra_body dict from the request
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The mapped reasoning_effort value or None
|
||||||
|
"""
|
||||||
|
if not extra_body:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Navigate through the nested structure
|
||||||
|
google_config = extra_body.get("google", {})
|
||||||
|
thinking_config = google_config.get("thinking_config", {})
|
||||||
|
thinking_budget = thinking_config.get("thinking_budget")
|
||||||
|
|
||||||
|
if thinking_budget is not None and isinstance(thinking_budget, (int, float)):
|
||||||
|
effort = map_thinking_budget_to_reasoning_effort(int(thinking_budget))
|
||||||
|
if effort:
|
||||||
|
logger.info(f"Cherry Studio thinking_budget {thinking_budget} mapped to reasoning_effort: {effort}")
|
||||||
|
else:
|
||||||
|
logger.info(f"Cherry Studio thinking_budget {thinking_budget} set to -1, using model default")
|
||||||
|
return effort
|
||||||
|
except (AttributeError, TypeError, KeyError) as e:
|
||||||
|
logger.debug(f"Failed to extract thinking_budget from extra_body: {e}")
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def extract_delta_from_chunk(chunk) -> str:
|
def extract_delta_from_chunk(chunk) -> str:
|
||||||
"""
|
"""
|
||||||
Extract delta text content from OCI streaming chunk.
|
Extract delta text content from OCI streaming chunk.
|
||||||
@@ -166,29 +234,35 @@ def extract_content_from_response(chat_response) -> str:
|
|||||||
|
|
||||||
|
|
||||||
@router.post("/completions", response_model=ChatCompletionResponse)
|
@router.post("/completions", response_model=ChatCompletionResponse)
|
||||||
async def create_chat_completion(request: ChatCompletionRequest):
|
async def create_chat_completion(
|
||||||
|
chat_request: ChatCompletionRequest,
|
||||||
|
request: Request
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Create a chat completion using OCI Generative AI.
|
Create a chat completion using OCI Generative AI.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
request: Chat completion request
|
chat_request: Chat completion request
|
||||||
|
request: FastAPI Request object for accessing headers
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Chat completion response
|
Chat completion response
|
||||||
"""
|
"""
|
||||||
logger.info(f"Chat completion request for model: {request.model}")
|
# Extract client name from x-title header
|
||||||
|
client_name = request.headers.get("x-title", "Unknown")
|
||||||
|
logger.info(f"Chat completion request for model: {chat_request.model}, client: {client_name}")
|
||||||
|
|
||||||
settings = get_settings()
|
settings = get_settings()
|
||||||
|
|
||||||
# Validate model exists
|
# Validate model exists
|
||||||
model_config = get_model_config(request.model)
|
model_config = get_model_config(chat_request.model)
|
||||||
if not model_config:
|
if not model_config:
|
||||||
raise ModelNotFoundException(request.model)
|
raise ModelNotFoundException(chat_request.model)
|
||||||
|
|
||||||
# Validate model type is chat (ondemand or dedicated)
|
# Validate model type is chat (ondemand or dedicated)
|
||||||
if model_config.type not in ("ondemand", "dedicated"):
|
if model_config.type not in ("ondemand", "dedicated"):
|
||||||
raise InvalidModelTypeException(
|
raise InvalidModelTypeException(
|
||||||
model_id=request.model,
|
model_id=chat_request.model,
|
||||||
expected_type="chat",
|
expected_type="chat",
|
||||||
actual_type=model_config.type
|
actual_type=model_config.type
|
||||||
)
|
)
|
||||||
@@ -197,24 +271,27 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
# If a model doesn't support certain content types, it will raise an error
|
# If a model doesn't support certain content types, it will raise an error
|
||||||
# For example, Cohere models will raise ValueError for non-text content
|
# For example, Cohere models will raise ValueError for non-text content
|
||||||
|
|
||||||
|
# Extract reasoning_effort from Cherry Studio's extra_body
|
||||||
|
reasoning_effort = extract_reasoning_effort_from_extra_body(chat_request.extra_body)
|
||||||
|
|
||||||
# Get OCI client from manager (轮询负载均衡)
|
# Get OCI client from manager (轮询负载均衡)
|
||||||
client_manager = get_client_manager()
|
client_manager = get_client_manager()
|
||||||
oci_client = client_manager.get_client()
|
oci_client = client_manager.get_client()
|
||||||
|
|
||||||
# Adapt messages
|
# Adapt messages
|
||||||
messages = adapt_chat_messages([msg.dict() for msg in request.messages])
|
messages = adapt_chat_messages([msg.dict() for msg in chat_request.messages])
|
||||||
|
|
||||||
# Extract parameters
|
# Extract parameters
|
||||||
params = extract_chat_params(request)
|
params = extract_chat_params(chat_request)
|
||||||
|
|
||||||
# Determine streaming mode
|
# Determine streaming mode
|
||||||
# Priority: request.stream (client) > settings.enable_streaming (global)
|
# Priority: chat_request.stream (client) > settings.enable_streaming (global)
|
||||||
# Only enable streaming if BOTH conditions are met:
|
# Only enable streaming if BOTH conditions are met:
|
||||||
# 1. Client explicitly requests stream=true (default is false per OpenAI standard)
|
# 1. Client explicitly requests stream=true (default is false per OpenAI standard)
|
||||||
# 2. Global streaming is enabled via ENABLE_STREAMING
|
# 2. Global streaming is enabled via ENABLE_STREAMING
|
||||||
enable_stream = request.stream is True and settings.enable_streaming
|
enable_stream = chat_request.stream is True and settings.enable_streaming
|
||||||
|
|
||||||
if request.stream is True and not settings.enable_streaming:
|
if chat_request.stream is True and not settings.enable_streaming:
|
||||||
logger.info("Streaming requested by client but globally disabled via ENABLE_STREAMING=false")
|
logger.info("Streaming requested by client but globally disabled via ENABLE_STREAMING=false")
|
||||||
|
|
||||||
# Handle streaming
|
# Handle streaming
|
||||||
@@ -230,13 +307,14 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
response = await loop.run_in_executor(
|
response = await loop.run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: oci_client.chat(
|
lambda: oci_client.chat(
|
||||||
model_id=request.model,
|
model_id=chat_request.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=params["temperature"],
|
temperature=params["temperature"],
|
||||||
max_tokens=params["max_tokens"],
|
max_tokens=params["max_tokens"],
|
||||||
top_p=params["top_p"],
|
top_p=params["top_p"],
|
||||||
stream=True, # Enable real streaming
|
stream=True, # Enable real streaming
|
||||||
tools=params.get("tools"),
|
tools=params.get("tools"),
|
||||||
|
reasoning_effort=reasoning_effort,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -264,7 +342,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
iterator = stream_data
|
iterator = stream_data
|
||||||
|
|
||||||
# Send first chunk with role and empty content (OpenAI format)
|
# Send first chunk with role and empty content (OpenAI format)
|
||||||
yield adapt_streaming_chunk("", request.model, request_id, 0, is_first=True)
|
yield adapt_streaming_chunk("", chat_request.model, request_id, 0, is_first=True)
|
||||||
|
|
||||||
# Use queue for thread-safe chunk forwarding
|
# Use queue for thread-safe chunk forwarding
|
||||||
import queue
|
import queue
|
||||||
@@ -307,7 +385,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
delta_text = extract_delta_from_chunk(chunk)
|
delta_text = extract_delta_from_chunk(chunk)
|
||||||
|
|
||||||
if delta_text:
|
if delta_text:
|
||||||
yield adapt_streaming_chunk(delta_text, request.model, request_id, 0, is_first=False)
|
yield adapt_streaming_chunk(delta_text, chat_request.model, request_id, 0, is_first=False)
|
||||||
|
|
||||||
# Try to extract usage from chunk (typically in final chunk)
|
# Try to extract usage from chunk (typically in final chunk)
|
||||||
# Handle both SSE Event format and object format
|
# Handle both SSE Event format and object format
|
||||||
@@ -334,7 +412,7 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Send done message with usage
|
# Send done message with usage
|
||||||
yield adapt_streaming_done(request.model, request_id, usage=accumulated_usage)
|
yield adapt_streaming_done(chat_request.model, request_id, usage=accumulated_usage)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Fallback: non-streaming response, simulate streaming
|
# Fallback: non-streaming response, simulate streaming
|
||||||
@@ -355,14 +433,14 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
|
|
||||||
# Simulate streaming by chunking
|
# Simulate streaming by chunking
|
||||||
# First send empty chunk with role (OpenAI format)
|
# First send empty chunk with role (OpenAI format)
|
||||||
yield adapt_streaming_chunk("", request.model, request_id, 0, is_first=True)
|
yield adapt_streaming_chunk("", chat_request.model, request_id, 0, is_first=True)
|
||||||
|
|
||||||
chunk_size = settings.stream_chunk_size
|
chunk_size = settings.stream_chunk_size
|
||||||
for i in range(0, len(content), chunk_size):
|
for i in range(0, len(content), chunk_size):
|
||||||
chunk = content[i:i + chunk_size]
|
chunk = content[i:i + chunk_size]
|
||||||
yield adapt_streaming_chunk(chunk, request.model, request_id, 0, is_first=False)
|
yield adapt_streaming_chunk(chunk, chat_request.model, request_id, 0, is_first=False)
|
||||||
|
|
||||||
yield adapt_streaming_done(request.model, request_id, usage=accumulated_usage)
|
yield adapt_streaming_done(chat_request.model, request_id, usage=accumulated_usage)
|
||||||
|
|
||||||
except TypeError as te:
|
except TypeError as te:
|
||||||
# Handle case where response is not iterable at all
|
# Handle case where response is not iterable at all
|
||||||
@@ -397,17 +475,18 @@ async def create_chat_completion(request: ChatCompletionRequest):
|
|||||||
# Non-streaming response
|
# Non-streaming response
|
||||||
try:
|
try:
|
||||||
response = oci_client.chat(
|
response = oci_client.chat(
|
||||||
model_id=request.model,
|
model_id=chat_request.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
temperature=params["temperature"],
|
temperature=params["temperature"],
|
||||||
max_tokens=params["max_tokens"],
|
max_tokens=params["max_tokens"],
|
||||||
top_p=params["top_p"],
|
top_p=params["top_p"],
|
||||||
stream=False,
|
stream=False,
|
||||||
tools=params.get("tools"),
|
tools=params.get("tools"),
|
||||||
|
reasoning_effort=reasoning_effort,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Adapt response to OpenAI format
|
# Adapt response to OpenAI format
|
||||||
openai_response = adapt_chat_response(response, request.model)
|
openai_response = adapt_chat_response(response, chat_request.model)
|
||||||
|
|
||||||
if settings.log_responses:
|
if settings.log_responses:
|
||||||
logger.debug(f"Response: {openai_response}")
|
logger.debug(f"Response: {openai_response}")
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ class ChatCompletionRequest(BaseModel):
|
|||||||
user: Optional[str] = None
|
user: Optional[str] = None
|
||||||
tools: Optional[List[Dict[str, Any]]] = None
|
tools: Optional[List[Dict[str, Any]]] = None
|
||||||
tool_choice: Optional[Union[str, Dict[str, Any]]] = None
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None
|
||||||
|
extra_body: Optional[Dict[str, Any]] = None # Cherry Studio and other client extensions
|
||||||
|
|
||||||
|
|
||||||
class ChatCompletionChoice(BaseModel):
|
class ChatCompletionChoice(BaseModel):
|
||||||
|
|||||||
@@ -184,6 +184,7 @@ class OCIGenAIClient:
|
|||||||
top_p: float = 1.0,
|
top_p: float = 1.0,
|
||||||
stream: bool = False,
|
stream: bool = False,
|
||||||
tools: Optional[list] = None,
|
tools: Optional[list] = None,
|
||||||
|
reasoning_effort: Optional[str] = None,
|
||||||
):
|
):
|
||||||
"""Send a chat completion request to OCI GenAI."""
|
"""Send a chat completion request to OCI GenAI."""
|
||||||
model_config = get_model_config(model_id)
|
model_config = get_model_config(model_id)
|
||||||
@@ -208,7 +209,7 @@ class OCIGenAIClient:
|
|||||||
)
|
)
|
||||||
elif model_config.provider in ["meta", "xai", "google", "openai"]:
|
elif model_config.provider in ["meta", "xai", "google", "openai"]:
|
||||||
chat_request = self._build_generic_request(
|
chat_request = self._build_generic_request(
|
||||||
messages, temperature, max_tokens, top_p, tools, model_config.provider, stream
|
messages, temperature, max_tokens, top_p, tools, model_config.provider, stream, reasoning_effort
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported provider: {model_config.provider}")
|
raise ValueError(f"Unsupported provider: {model_config.provider}")
|
||||||
@@ -278,7 +279,7 @@ class OCIGenAIClient:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _build_generic_request(
|
def _build_generic_request(
|
||||||
self, messages: list, temperature: float, max_tokens: int, top_p: float, tools: Optional[list], provider: str, stream: bool = False
|
self, messages: list, temperature: float, max_tokens: int, top_p: float, tools: Optional[list], provider: str, stream: bool = False, reasoning_effort: Optional[str] = None
|
||||||
) -> GenericChatRequest:
|
) -> GenericChatRequest:
|
||||||
"""Build Generic chat request for Llama and other models."""
|
"""Build Generic chat request for Llama and other models."""
|
||||||
# Convert messages to Generic format
|
# Convert messages to Generic format
|
||||||
@@ -318,13 +319,21 @@ class OCIGenAIClient:
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
return GenericChatRequest(
|
# Build request parameters
|
||||||
messages=generic_messages,
|
request_params = {
|
||||||
temperature=temperature,
|
"messages": generic_messages,
|
||||||
max_tokens=max_tokens,
|
"temperature": temperature,
|
||||||
top_p=top_p,
|
"max_tokens": max_tokens,
|
||||||
is_stream=stream,
|
"top_p": top_p,
|
||||||
)
|
"is_stream": stream,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add reasoning_effort if provided (only for generic models)
|
||||||
|
if reasoning_effort:
|
||||||
|
request_params["reasoning_effort"] = reasoning_effort.upper()
|
||||||
|
logger.info(f"Setting reasoning_effort to {reasoning_effort.upper()} for {provider} model")
|
||||||
|
|
||||||
|
return GenericChatRequest(**request_params)
|
||||||
|
|
||||||
def embed(
|
def embed(
|
||||||
self,
|
self,
|
||||||
|
|||||||
153
tests/test_cherry_studio_optimization.sh
Executable file
153
tests/test_cherry_studio_optimization.sh
Executable file
@@ -0,0 +1,153 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# 测试 Cherry Studio 客户端优化功能
|
||||||
|
# 1. 测试客户端名称显示(x-title 请求头)
|
||||||
|
# 2. 测试 thinking_budget 到 reasoning_effort 的映射
|
||||||
|
|
||||||
|
API_URL="http://localhost:8000/v1/chat/completions"
|
||||||
|
API_KEY="sk-oci-genai-default-key"
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "测试 1: thinking_budget = 1000 (应映射到 low)"
|
||||||
|
echo "=========================================="
|
||||||
|
curl -s -X POST "$API_URL" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $API_KEY" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hello, how are you?"}
|
||||||
|
],
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 100,
|
||||||
|
"extra_body": {
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": 1000,
|
||||||
|
"include_thoughts": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}' | jq .
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo "测试 2: thinking_budget = 5000 (应映射到 medium)"
|
||||||
|
echo "=========================================="
|
||||||
|
curl -s -X POST "$API_URL" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $API_KEY" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "What is 2+2?"}
|
||||||
|
],
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 100,
|
||||||
|
"extra_body": {
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": 5000,
|
||||||
|
"include_thoughts": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}' | jq .
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo "测试 3: thinking_budget = 20000 (应映射到 high)"
|
||||||
|
echo "=========================================="
|
||||||
|
curl -s -X POST "$API_URL" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $API_KEY" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Explain quantum computing"}
|
||||||
|
],
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 100,
|
||||||
|
"extra_body": {
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": 20000,
|
||||||
|
"include_thoughts": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}' | jq .
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo "测试 4: thinking_budget = -1 (应使用模型默认值)"
|
||||||
|
echo "=========================================="
|
||||||
|
curl -s -X POST "$API_URL" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $API_KEY" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Tell me a joke"}
|
||||||
|
],
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 100,
|
||||||
|
"extra_body": {
|
||||||
|
"google": {
|
||||||
|
"thinking_config": {
|
||||||
|
"thinking_budget": -1,
|
||||||
|
"include_thoughts": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}' | jq .
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo "测试 5: 无 extra_body (正常请求)"
|
||||||
|
echo "=========================================="
|
||||||
|
curl -s -X POST "$API_URL" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $API_KEY" \
|
||||||
|
-H "x-title: Cherry Studio" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Hi there!"}
|
||||||
|
],
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 100
|
||||||
|
}' | jq .
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo "测试 6: 不同客户端名称 (Postman)"
|
||||||
|
echo "=========================================="
|
||||||
|
curl -s -X POST "$API_URL" \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer $API_KEY" \
|
||||||
|
-H "x-title: Postman" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "Test from Postman"}
|
||||||
|
],
|
||||||
|
"temperature": 0.7,
|
||||||
|
"max_tokens": 100
|
||||||
|
}' | jq .
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "=========================================="
|
||||||
|
echo "所有测试完成!"
|
||||||
|
echo "请查看服务器日志,验证:"
|
||||||
|
echo "1. 客户端名称是否正确显示(Cherry Studio / Postman)"
|
||||||
|
echo "2. thinking_budget 是否正确映射到 reasoning_effort"
|
||||||
|
echo " - thinking_budget = 1000 → reasoning_effort = LOW"
|
||||||
|
echo " - thinking_budget = 5000 → reasoning_effort = MEDIUM"
|
||||||
|
echo " - thinking_budget = 20000 → reasoning_effort = HIGH"
|
||||||
|
echo " - thinking_budget = -1 → 使用模型默认值(无 reasoning_effort 日志)"
|
||||||
|
echo "=========================================="
|
||||||
Reference in New Issue
Block a user