第一次提交
All checks were successful
Build and Push OCI GenAI Gateway Docker Image / docker-build-push (push) Successful in 32m3s
All checks were successful
Build and Push OCI GenAI Gateway Docker Image / docker-build-push (push) Successful in 32m3s
This commit is contained in:
65
.env.example
Normal file
65
.env.example
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
# API Settings
|
||||||
|
API_TITLE=OCI GenAI to OpenAI API Gateway
|
||||||
|
API_VERSION=0.0.1
|
||||||
|
API_PREFIX=/v1
|
||||||
|
API_PORT=8000
|
||||||
|
API_HOST=0.0.0.0
|
||||||
|
DEBUG=false
|
||||||
|
|
||||||
|
# Authentication
|
||||||
|
# Comma-separated list of API keys for authentication
|
||||||
|
# These are the keys clients will use in Authorization: Bearer <key>
|
||||||
|
API_KEYS=["sk-oci-genai-default-key"]
|
||||||
|
|
||||||
|
# ============================================
|
||||||
|
# OCI Configuration
|
||||||
|
# ============================================
|
||||||
|
# Path to OCI config file (usually ~/.oci/config)
|
||||||
|
OCI_CONFIG_FILE=~/.oci/config
|
||||||
|
|
||||||
|
# Profile names in the OCI config file
|
||||||
|
# 支持单个或多个 profile,多个 profile 用逗号分隔
|
||||||
|
# 多个 profile 时会自动使用轮询(round-robin)负载均衡
|
||||||
|
# 示例:
|
||||||
|
# 单配置:OCI_CONFIG_PROFILE=DEFAULT
|
||||||
|
# 多配置:OCI_CONFIG_PROFILE=DEFAULT,CHICAGO,ASHBURN
|
||||||
|
# 注意:每个 profile 在 ~/.oci/config 中必须包含 region 和 tenancy (作为 compartment_id)
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT
|
||||||
|
|
||||||
|
# Authentication type: api_key or instance_principal
|
||||||
|
OCI_AUTH_TYPE=api_key
|
||||||
|
|
||||||
|
# Optional: Direct endpoint for dedicated models
|
||||||
|
# GENAI_ENDPOINT=https://your-dedicated-endpoint
|
||||||
|
|
||||||
|
# Model Settings
|
||||||
|
# Note: Available models are dynamically loaded from OCI at startup
|
||||||
|
# Use GET /v1/models to see all available models
|
||||||
|
MAX_TOKENS=4096
|
||||||
|
TEMPERATURE=0.7
|
||||||
|
|
||||||
|
# Embedding Settings
|
||||||
|
# Truncate strategy for embeddings: END or START
|
||||||
|
EMBED_TRUNCATE=END
|
||||||
|
|
||||||
|
# Streaming Settings
|
||||||
|
# Global streaming on/off switch
|
||||||
|
# Set to false to disable streaming for all requests (overrides client stream=true)
|
||||||
|
ENABLE_STREAMING=true
|
||||||
|
# Chunk size for simulated streaming (fallback mode only)
|
||||||
|
# Only used when OCI returns non-streaming response
|
||||||
|
STREAM_CHUNK_SIZE=1024
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
# Log incoming requests (may contain sensitive data)
|
||||||
|
LOG_REQUESTS=false
|
||||||
|
# Log responses (may contain sensitive data)
|
||||||
|
LOG_RESPONSES=false
|
||||||
|
# Log file path (optional, if not set logs only to console)
|
||||||
|
LOG_FILE=./logs/app.log
|
||||||
|
# Max log file size in MB (default: 10)
|
||||||
|
LOG_FILE_MAX_SIZE=10
|
||||||
|
# Number of backup log files to keep (default: 5)
|
||||||
|
LOG_FILE_BACKUP_COUNT=5
|
||||||
82
.gitea/workflows/ci.yaml
Normal file
82
.gitea/workflows/ci.yaml
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
# .gitea/workflows/ci.yaml
|
||||||
|
name: Build and Push OCI GenAI Gateway Docker Image
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main, develop]
|
||||||
|
tags: ['*']
|
||||||
|
|
||||||
|
env:
|
||||||
|
DOCKER_BUILDKIT: "1"
|
||||||
|
BUILDX_NO_DEFAULT_ATTESTATIONS: "1"
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
docker-build-push:
|
||||||
|
runs-on: ubuntu-latest-amd64
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Debug branch info
|
||||||
|
run: |
|
||||||
|
echo "📋 Branch Information:"
|
||||||
|
echo " github.ref: ${{ github.ref }}"
|
||||||
|
echo " github.ref_name: ${{ github.ref_name }}"
|
||||||
|
echo " github.event_name: ${{ github.event_name }}"
|
||||||
|
|
||||||
|
- name: Setup Docker Buildx and Login
|
||||||
|
run: |
|
||||||
|
# 设置 QEMU 支持多架构
|
||||||
|
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 2>/dev/null || true
|
||||||
|
|
||||||
|
# 创建 buildx builder
|
||||||
|
docker buildx create --use --name oci_genai_builder \
|
||||||
|
--driver docker-container \
|
||||||
|
--driver-opt network=host \
|
||||||
|
--driver-opt image=moby/buildkit:buildx-stable-1 \
|
||||||
|
--driver-opt env.BUILDKIT_STEP_LOG_MAX_SIZE=50000000 \
|
||||||
|
--driver-opt env.BUILDKIT_STEP_LOG_MAX_SPEED=10000000 \
|
||||||
|
|| docker buildx use oci_genai_builder
|
||||||
|
docker buildx inspect --bootstrap
|
||||||
|
|
||||||
|
# 登录 Docker Registry
|
||||||
|
echo "${{ secrets.BUILD_TOKEN }}" | docker login ${{ gitea.server_url }} -u ${{ gitea.actor }} --password-stdin
|
||||||
|
|
||||||
|
- name: Determine Docker tag
|
||||||
|
id: tag
|
||||||
|
run: |
|
||||||
|
if [ "${{ github.ref_name }}" = "main" ]; then
|
||||||
|
TAG="latest"
|
||||||
|
elif [ "${{ github.ref_name }}" = "develop" ]; then
|
||||||
|
TAG="develop"
|
||||||
|
elif [[ "${{ github.ref }}" == refs/tags/* ]]; then
|
||||||
|
TAG="${{ github.ref_name }}"
|
||||||
|
else
|
||||||
|
TAG="${{ github.ref_name }}"
|
||||||
|
fi
|
||||||
|
echo "tag=${TAG}" >> $GITHUB_OUTPUT
|
||||||
|
echo "📦 Docker tag: ${TAG}"
|
||||||
|
|
||||||
|
- name: Build and push multi-arch Docker image
|
||||||
|
run: |
|
||||||
|
# 移除 URL 中的 https:// 前缀
|
||||||
|
REGISTRY=$(echo "${{ gitea.server_url }}" | sed 's|https\?://||')
|
||||||
|
IMAGE_NAME="${REGISTRY}/${{ gitea.repository }}"
|
||||||
|
TAG="${{ steps.tag.outputs.tag }}"
|
||||||
|
FINAL_IMAGE_TAG="${IMAGE_NAME}:${TAG}"
|
||||||
|
|
||||||
|
echo "🏗️ Building and pushing image: ${FINAL_IMAGE_TAG}"
|
||||||
|
echo " Platforms: linux/amd64, linux/arm64"
|
||||||
|
|
||||||
|
# 设置 BuildKit 优化参数
|
||||||
|
export BUILDKIT_PROGRESS=plain
|
||||||
|
|
||||||
|
docker buildx build --pull --push \
|
||||||
|
-t "${FINAL_IMAGE_TAG}" \
|
||||||
|
--platform linux/amd64,linux/arm64 \
|
||||||
|
--provenance=false \
|
||||||
|
--sbom=false \
|
||||||
|
-f Dockerfile .
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "✅ Build and push completed!"
|
||||||
|
echo "🐳 Image: ${FINAL_IMAGE_TAG}"
|
||||||
78
.gitignore
vendored
Normal file
78
.gitignore
vendored
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
# Claude
|
||||||
|
.claude/
|
||||||
|
CLAUDE.md
|
||||||
|
.mcp.json
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# Virtual Environment
|
||||||
|
venv/
|
||||||
|
env/
|
||||||
|
ENV/
|
||||||
|
.venv
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
|
# Environment variables
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
|
||||||
|
# OCI Config (contains sensitive keys)
|
||||||
|
.oci/
|
||||||
|
*.pem
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
logs/
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
.pytest_cache/
|
||||||
|
.coverage
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
|
||||||
|
# Distribution
|
||||||
|
*.tar.gz
|
||||||
|
*.whl
|
||||||
|
|
||||||
|
# Docker
|
||||||
|
*.dockerfile.swp
|
||||||
|
|
||||||
|
# Source repositories
|
||||||
|
.source/
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
tmp/
|
||||||
|
temp/
|
||||||
|
*.tmp
|
||||||
|
example/
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
Thumbs.db
|
||||||
47
Dockerfile
Normal file
47
Dockerfile
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# Multi-stage build for OCI GenAI to OpenAI API Gateway
|
||||||
|
FROM python:3.11-slim as builder
|
||||||
|
|
||||||
|
# 设置工作目录
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# 安装系统依赖
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
gcc \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# 复制依赖文件
|
||||||
|
COPY requirements.txt .
|
||||||
|
|
||||||
|
# 安装 Python 依赖
|
||||||
|
RUN pip install --no-cache-dir --user -r requirements.txt
|
||||||
|
|
||||||
|
# 最终镜像
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
# 设置环境变量
|
||||||
|
ENV PYTHONUNBUFFERED=1 \
|
||||||
|
PYTHONDONTWRITEBYTECODE=1 \
|
||||||
|
PATH=/root/.local/bin:$PATH
|
||||||
|
|
||||||
|
# 设置工作目录
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# 复制 Python 依赖
|
||||||
|
COPY --from=builder /root/.local /root/.local
|
||||||
|
|
||||||
|
# 复制应用代码
|
||||||
|
COPY src/ ./src/
|
||||||
|
COPY .env.example .env
|
||||||
|
|
||||||
|
# 创建日志目录
|
||||||
|
RUN mkdir -p /app/logs
|
||||||
|
|
||||||
|
# 暴露端口
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# 健康检查
|
||||||
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
||||||
|
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health').read()"
|
||||||
|
|
||||||
|
# 启动应用
|
||||||
|
CMD ["python", "-m", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 OCI GenAI Gateway
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
240
README.md
Normal file
240
README.md
Normal file
@@ -0,0 +1,240 @@
|
|||||||
|
# OCI GenAI to OpenAI API 网关
|
||||||
|
|
||||||
|
> 🚀 为 Oracle Cloud Infrastructure 的 Generative AI Service 提供 OpenAI 兼容的 REST API
|
||||||
|
|
||||||
|
[](LICENSE)
|
||||||
|
[](https://www.python.org/downloads/)
|
||||||
|
[](https://fastapi.tiangolo.com/)
|
||||||
|
|
||||||
|
## 📖 简介
|
||||||
|
|
||||||
|
这是一个 FastAPI 服务,作为 OCI Generative AI 和 OpenAI API 之间的转换层,允许 OpenAI SDK 客户端无需修改代码即可与 OCI GenAI 模型交互。
|
||||||
|
|
||||||
|
## ✨ 主要特性
|
||||||
|
|
||||||
|
- 🔄 **OpenAI API 兼容**: 完全兼容 OpenAI SDK,无需修改现有代码
|
||||||
|
- 🤖 **动态模型发现**: 启动时自动从 OCI 获取所有可用模型
|
||||||
|
- 🌐 **多区域负载均衡**: 支持多个 OCI profiles 的 round-robin 负载均衡
|
||||||
|
- 🖼️ **多模态支持**: 支持文本、图像(Vision 模型)、Base64 编码等多种内容类型
|
||||||
|
- ⚡ **真实流式传输**: 真正的边缘到边缘流式响应,TTFB < 200ms
|
||||||
|
- 🔒 **安全性**: 自动过滤敏感信息(OCID、request-id、endpoint URLs)
|
||||||
|
- 🎯 **性能优化**: 客户端连接池机制,显著提升性能
|
||||||
|
|
||||||
|
## 🚀 快速开始
|
||||||
|
|
||||||
|
### 前置要求
|
||||||
|
|
||||||
|
- Python 3.8+
|
||||||
|
- OCI 账号和 API 密钥
|
||||||
|
- OCI Generative AI 服务访问权限
|
||||||
|
|
||||||
|
### 安装
|
||||||
|
|
||||||
|
1. **克隆仓库**
|
||||||
|
```bash
|
||||||
|
git clone <repository-url>
|
||||||
|
cd oracle-openai
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **安装依赖**
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **配置 OCI**
|
||||||
|
|
||||||
|
创建或编辑 `~/.oci/config`:
|
||||||
|
```ini
|
||||||
|
[DEFAULT]
|
||||||
|
user=ocid1.user.oc1...
|
||||||
|
fingerprint=aa:bb:cc:dd...
|
||||||
|
key_file=~/.oci/oci_api_key.pem
|
||||||
|
tenancy=ocid1.tenancy.oc1...
|
||||||
|
region=us-chicago-1
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **配置环境变量**
|
||||||
|
|
||||||
|
复制 `.env.example` 到 `.env` 并编辑:
|
||||||
|
```bash
|
||||||
|
cp .env.example .env
|
||||||
|
# 编辑 .env 文件设置 API_KEYS 和其他配置
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **运行服务**
|
||||||
|
```bash
|
||||||
|
cd src
|
||||||
|
python main.py
|
||||||
|
```
|
||||||
|
|
||||||
|
服务将在 `http://localhost:8000` 启动
|
||||||
|
|
||||||
|
## 💻 使用示例
|
||||||
|
|
||||||
|
### 使用 cURL
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/v1/chat/completions \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-H "Authorization: Bearer sk-oci-genai-default-key" \
|
||||||
|
-d '{
|
||||||
|
"model": "google.gemini-2.5-pro",
|
||||||
|
"messages": [{"role": "user", "content": "你好!"}]
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
|
### 使用 Python OpenAI SDK
|
||||||
|
|
||||||
|
```python
|
||||||
|
from openai import OpenAI
|
||||||
|
|
||||||
|
client = OpenAI(
|
||||||
|
api_key="sk-oci-genai-default-key",
|
||||||
|
base_url="http://localhost:8000/v1"
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="google.gemini-2.5-pro",
|
||||||
|
messages=[{"role": "user", "content": "你好!"}]
|
||||||
|
)
|
||||||
|
|
||||||
|
print(response.choices[0].message.content)
|
||||||
|
```
|
||||||
|
|
||||||
|
### 流式响应
|
||||||
|
|
||||||
|
```python
|
||||||
|
stream = client.chat.completions.create(
|
||||||
|
model="google.gemini-2.5-pro",
|
||||||
|
messages=[{"role": "user", "content": "从1数到10"}],
|
||||||
|
stream=True
|
||||||
|
)
|
||||||
|
|
||||||
|
for chunk in stream:
|
||||||
|
if chunk.choices[0].delta.content:
|
||||||
|
print(chunk.choices[0].delta.content, end="", flush=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
### Vision 模型(多模态)
|
||||||
|
|
||||||
|
```python
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model="google.gemini-2.5-pro",
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{"type": "text", "text": "描述这张图片"},
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://example.com/image.jpg"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📋 支持的端点
|
||||||
|
|
||||||
|
| 端点 | 方法 | 说明 |
|
||||||
|
|------|------|------|
|
||||||
|
| `/health` | GET | 健康检查 |
|
||||||
|
| `/v1/models` | GET | 列出所有可用模型 |
|
||||||
|
| `/v1/chat/completions` | POST | 对话补全(支持流式) |
|
||||||
|
| `/v1/embeddings` | POST | 文本嵌入 |
|
||||||
|
|
||||||
|
## 🎨 支持的模型
|
||||||
|
|
||||||
|
服务启动时自动从 OCI 发现可用模型,包括:
|
||||||
|
|
||||||
|
- **Cohere**: command-r-plus, command-r-16k 等
|
||||||
|
- **Meta**: llama-3.1-405b, llama-3.1-70b, llama-3.2-90b-vision 等
|
||||||
|
- **Google**: gemini 系列
|
||||||
|
- **OpenAI**: gpt 系列
|
||||||
|
- **xAI**: grok 系列
|
||||||
|
|
||||||
|
使用 `GET /v1/models` 查看所有可用模型。
|
||||||
|
|
||||||
|
## ⚙️ 配置选项
|
||||||
|
|
||||||
|
### 关键环境变量
|
||||||
|
|
||||||
|
| 变量 | 说明 | 默认值 |
|
||||||
|
|------|------|--------|
|
||||||
|
| `API_KEYS` | API 密钥列表(JSON 数组) | - |
|
||||||
|
| `OCI_CONFIG_PROFILE` | OCI 配置 profile(支持多个,逗号分隔) | `DEFAULT` |
|
||||||
|
| `OCI_AUTH_TYPE` | 认证类型 | `api_key` |
|
||||||
|
| `MAX_TOKENS` | 默认最大 token 数 | `4096` |
|
||||||
|
| `TEMPERATURE` | 默认温度参数 | `0.7` |
|
||||||
|
| `ENABLE_STREAMING` | 全局流式开关 | `true` |
|
||||||
|
| `LOG_LEVEL` | 日志级别 | `INFO` |
|
||||||
|
|
||||||
|
完整配置请参考 [.env.example](.env.example)
|
||||||
|
|
||||||
|
## 🌐 多区域负载均衡
|
||||||
|
|
||||||
|
支持配置多个 OCI profiles 实现自动负载均衡:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# .env 文件
|
||||||
|
OCI_CONFIG_PROFILE=DEFAULT,CHICAGO,ASHBURN
|
||||||
|
```
|
||||||
|
|
||||||
|
系统将使用 round-robin 策略在不同区域之间分配请求。
|
||||||
|
|
||||||
|
## 🐳 Docker 部署
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 使用 docker-compose
|
||||||
|
docker-compose up
|
||||||
|
|
||||||
|
# 或使用 Docker
|
||||||
|
docker build -t oci-genai-gateway .
|
||||||
|
docker run -p 8000:8000 --env-file .env oci-genai-gateway
|
||||||
|
```
|
||||||
|
|
||||||
|
## 📚 文档
|
||||||
|
|
||||||
|
- [CLAUDE.md](CLAUDE.md) - 完整的开发文档,包含架构说明、开发指南和调试技巧
|
||||||
|
- [.env.example](.env.example) - 环境变量配置示例
|
||||||
|
|
||||||
|
## 🔧 故障排除
|
||||||
|
|
||||||
|
### 常见问题
|
||||||
|
|
||||||
|
1. **模型未找到**
|
||||||
|
- 检查模型 ID 拼写
|
||||||
|
- 确认模型在您的 OCI 区域可用
|
||||||
|
- 查看启动日志确认模型已加载
|
||||||
|
|
||||||
|
2. **认证失败**
|
||||||
|
- 验证 `~/.oci/config` 配置正确
|
||||||
|
- 检查 API 密钥文件权限:`chmod 600 ~/.oci/oci_api_key.pem`
|
||||||
|
- 运行 `oci iam region list` 测试 OCI 配置
|
||||||
|
|
||||||
|
3. **429 速率限制错误**
|
||||||
|
- 使用多个 profile 进行负载均衡
|
||||||
|
- 等待 1-2 分钟后重试
|
||||||
|
|
||||||
|
更多故障排除信息请参考 [CLAUDE.md](CLAUDE.md#调试)
|
||||||
|
|
||||||
|
## 🤝 贡献
|
||||||
|
|
||||||
|
欢迎贡献!请随时提交 issues 或 pull requests。
|
||||||
|
|
||||||
|
## 📄 许可证
|
||||||
|
|
||||||
|
本项目基于 UPL (Universal Permissive License) 开源,详见 [LICENSE](LICENSE) 文件。
|
||||||
|
|
||||||
|
## 🙏 致谢
|
||||||
|
|
||||||
|
- [FastAPI](https://fastapi.tiangolo.com/) - 现代、快速的 Web 框架
|
||||||
|
- [OCI Python SDK](https://github.com/oracle/oci-python-sdk) - Oracle Cloud Infrastructure SDK
|
||||||
|
- [OpenAI](https://openai.com/) - API 设计参考
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**⭐ 如果这个项目对您有帮助,请给我们一个 Star!**
|
||||||
35
docker-compose.yml
Normal file
35
docker-compose.yml
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
oci-genai-gateway:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: Dockerfile
|
||||||
|
container_name: oci-genai-gateway
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
volumes:
|
||||||
|
# 挂载 OCI 配置文件(根据实际路径调整)
|
||||||
|
- ~/.oci:/root/.oci:ro
|
||||||
|
# 挂载环境配置文件
|
||||||
|
- .env:/app/.env:ro
|
||||||
|
# 挂载日志目录
|
||||||
|
- ./logs:/app/logs
|
||||||
|
environment:
|
||||||
|
- API_TITLE=OCI GenAI to OpenAI API Gateway
|
||||||
|
- API_VERSION=0.0.1
|
||||||
|
- DEBUG=false
|
||||||
|
- LOG_LEVEL=INFO
|
||||||
|
restart: unless-stopped
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||||
|
interval: 30s
|
||||||
|
timeout: 10s
|
||||||
|
retries: 3
|
||||||
|
start_period: 10s
|
||||||
|
networks:
|
||||||
|
- genai-network
|
||||||
|
|
||||||
|
networks:
|
||||||
|
genai-network:
|
||||||
|
driver: bridge
|
||||||
24
init.sh
Normal file
24
init.sh
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Modify the CMakeLists.txt and source files to change the project name from "xmrigcc" to "xxxigcc"
|
||||||
|
sed -i 's/project(xmrigcc)/project(xxxigcc)/' CMakeLists.txt
|
||||||
|
sed -i 's/XMRigCC: Found ccache package/XXXigCC: Found ccache package/' CMakeLists.txt
|
||||||
|
sed -i 's/MINER_EXECUTABLE_NAME "xmrigMiner"/MINER_EXECUTABLE_NAME "xxxigMiner"/' CMakeLists.txt
|
||||||
|
sed -i 's/DAEMON_EXECUTABLE_NAME "xmrigDaemon"/DAEMON_EXECUTABLE_NAME "xxxigDaemon"/' CMakeLists.txt
|
||||||
|
sed -i 's/xmrigServer ${SOURCES_CC_SERVER}/xxxigServer ${SOURCES_CC_SERVER}/' CMakeLists.txt
|
||||||
|
sed -i 's/xmrigServer ${XMRIG_ASM_LIBRARY}/xxxigServer ${XMRIG_ASM_LIBRARY}/' CMakeLists.txt
|
||||||
|
sed -i 's/xmrigServer POST_BUILD/xxxigServer POST_BUILD/' CMakeLists.txt
|
||||||
|
|
||||||
|
# Modify donate functionality
|
||||||
|
sed -i 's/kDefaultDonateLevel = 3/kDefaultDonateLevel = 0/' src/donate.h
|
||||||
|
sed -i 's/kMinimumDonateLevel = 1/kMinimumDonateLevel = 0/' src/donate.h
|
||||||
|
sed -i 's/donate.graef.in/127.0.0.1/' src/net/strategies/DonateStrategy.cpp
|
||||||
|
sed -i 's/87.106.163.52/127.0.0.1/' src/net/strategies/DonateStrategy.cpp
|
||||||
|
sed -i 's/"donate-level": 3/"donate-level": 0/' src/config.json
|
||||||
|
sed -i 's/"donate-over-proxy": 1/"donate-over-proxy": 0/' src/config.json
|
||||||
|
|
||||||
|
# Modify version information
|
||||||
|
sed -i 's/Copyright (C) 2017- XMRigCC//' src/version.h
|
||||||
|
sed -i 's/https:\/\/github.com\/BenDr0id\/xmrigCC\///' src/version.h
|
||||||
|
sed -i 's/xmrigcc/xxxigcc/' src/version.h
|
||||||
|
sed -i 's/XMRigCC/XXXigCC/' src/version.h
|
||||||
12
requirements.txt
Normal file
12
requirements.txt
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
# FastAPI and server
|
||||||
|
fastapi==0.115.0
|
||||||
|
uvicorn[standard]==0.32.0
|
||||||
|
pydantic==2.9.2
|
||||||
|
pydantic-settings==2.6.1
|
||||||
|
|
||||||
|
# OCI SDK (updated to latest stable version)
|
||||||
|
oci>=2.160.0
|
||||||
|
|
||||||
|
# Utilities
|
||||||
|
python-dotenv==1.0.1
|
||||||
|
python-multipart==0.0.17
|
||||||
1
src/api/__init__.py
Normal file
1
src/api/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""API module for OCI GenAI Gateway."""
|
||||||
1
src/api/adapters/__init__.py
Normal file
1
src/api/adapters/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Request/Response adapters module."""
|
||||||
104
src/api/adapters/request_adapter.py
Normal file
104
src/api/adapters/request_adapter.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
"""
|
||||||
|
Adapter for converting OpenAI requests to OCI GenAI format.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict, Any, Optional
|
||||||
|
from ..schemas import ChatCompletionRequest, EmbeddingRequest
|
||||||
|
from core.config import get_settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Content type handlers for extensible multimodal support
|
||||||
|
CONTENT_TYPE_HANDLERS = {
|
||||||
|
"text": lambda item: {"type": "text", "text": item.get("text", "")},
|
||||||
|
"image_url": lambda item: {"type": "image_url", "image_url": item.get("image_url", {})},
|
||||||
|
"audio": lambda item: {"type": "audio", "audio_url": item.get("audio_url", {})},
|
||||||
|
"video": lambda item: {"type": "video", "video_url": item.get("video_url", {})}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def adapt_chat_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Adapt OpenAI chat messages to OCI GenAI format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: OpenAI format messages
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Adapted messages for OCI GenAI
|
||||||
|
"""
|
||||||
|
adapted_messages = []
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
role = msg.get("role", "user")
|
||||||
|
content = msg.get("content", "")
|
||||||
|
|
||||||
|
# Handle different content types
|
||||||
|
if isinstance(content, list):
|
||||||
|
# Multimodal content
|
||||||
|
adapted_content = []
|
||||||
|
for item in content:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
item_type = item.get("type")
|
||||||
|
handler = CONTENT_TYPE_HANDLERS.get(item_type)
|
||||||
|
if handler:
|
||||||
|
adapted_content.append(handler(item))
|
||||||
|
else:
|
||||||
|
logger.warning(f"Unknown content type: {item_type}, skipping")
|
||||||
|
|
||||||
|
adapted_messages.append({
|
||||||
|
"role": role,
|
||||||
|
"content": adapted_content
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
# Simple text content
|
||||||
|
adapted_messages.append({
|
||||||
|
"role": role,
|
||||||
|
"content": content
|
||||||
|
})
|
||||||
|
|
||||||
|
return adapted_messages
|
||||||
|
|
||||||
|
|
||||||
|
def extract_chat_params(request: ChatCompletionRequest) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Extract chat parameters from OpenAI request.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: OpenAI chat completion request
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary of parameters for OCI GenAI
|
||||||
|
"""
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"temperature": request.temperature if request.temperature is not None else settings.temperature,
|
||||||
|
"max_tokens": request.max_tokens if request.max_tokens is not None else settings.max_tokens,
|
||||||
|
"top_p": request.top_p if request.top_p is not None else 1.0,
|
||||||
|
"stream": request.stream or False,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add tools if present
|
||||||
|
if request.tools:
|
||||||
|
params["tools"] = request.tools
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
def adapt_embedding_input(request: EmbeddingRequest) -> List[str]:
|
||||||
|
"""
|
||||||
|
Adapt OpenAI embedding input to OCI GenAI format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: OpenAI embedding request
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of texts to embed
|
||||||
|
"""
|
||||||
|
if isinstance(request.input, str):
|
||||||
|
return [request.input]
|
||||||
|
elif isinstance(request.input, list):
|
||||||
|
return request.input
|
||||||
|
else:
|
||||||
|
return [str(request.input)]
|
||||||
307
src/api/adapters/response_adapter.py
Normal file
307
src/api/adapters/response_adapter.py
Normal file
@@ -0,0 +1,307 @@
|
|||||||
|
"""
|
||||||
|
Adapter for converting OCI GenAI responses to OpenAI format.
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
from ..schemas import (
|
||||||
|
ChatCompletionResponse,
|
||||||
|
ChatCompletionChoice,
|
||||||
|
ChatCompletionUsage,
|
||||||
|
ChatMessage,
|
||||||
|
EmbeddingResponse,
|
||||||
|
EmbeddingData,
|
||||||
|
EmbeddingUsage,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def adapt_chat_response(
|
||||||
|
oci_response: Any,
|
||||||
|
model_id: str,
|
||||||
|
request_id: Optional[str] = None
|
||||||
|
) -> ChatCompletionResponse:
|
||||||
|
"""
|
||||||
|
Adapt OCI GenAI chat response to OpenAI format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
oci_response: OCI GenAI response object
|
||||||
|
model_id: Model identifier
|
||||||
|
request_id: Optional request ID
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
OpenAI-compatible chat completion response
|
||||||
|
"""
|
||||||
|
response_id = request_id or f"chatcmpl-{uuid.uuid4().hex[:8]}"
|
||||||
|
created_at = int(time.time())
|
||||||
|
|
||||||
|
# Extract response data
|
||||||
|
chat_response = oci_response.data.chat_response
|
||||||
|
|
||||||
|
# Extract text content
|
||||||
|
if hasattr(chat_response, 'text'):
|
||||||
|
# Cohere format
|
||||||
|
raw_text = chat_response.text
|
||||||
|
# Try to parse as JSON if it's a string (OCI format)
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
parsed = json.loads(raw_text)
|
||||||
|
if isinstance(parsed, dict) and 'text' in parsed:
|
||||||
|
content = parsed['text']
|
||||||
|
else:
|
||||||
|
content = raw_text
|
||||||
|
except (json.JSONDecodeError, ValueError, TypeError):
|
||||||
|
# Not JSON, use as-is
|
||||||
|
content = raw_text
|
||||||
|
finish_reason = chat_response.finish_reason if hasattr(chat_response, 'finish_reason') else "stop"
|
||||||
|
elif hasattr(chat_response, 'choices') and len(chat_response.choices) > 0:
|
||||||
|
# Llama/Generic format
|
||||||
|
choice = chat_response.choices[0]
|
||||||
|
if hasattr(choice, 'message'):
|
||||||
|
raw_content = choice.message.content
|
||||||
|
# Handle list format: [TextContent(text="...", type="TEXT")] or [{"text": "...", "type": "TEXT"}]
|
||||||
|
if isinstance(raw_content, list):
|
||||||
|
# Build multimodal content array
|
||||||
|
adapted_content = []
|
||||||
|
for item in raw_content:
|
||||||
|
# Handle OCI TextContent object
|
||||||
|
if hasattr(item, 'text') and hasattr(item, 'type'):
|
||||||
|
if item.type == 'TEXT' or item.type == 'text':
|
||||||
|
adapted_content.append({
|
||||||
|
"type": "text",
|
||||||
|
"text": item.text
|
||||||
|
})
|
||||||
|
# Future: handle IMAGE, AUDIO, VIDEO types
|
||||||
|
# Handle dict format
|
||||||
|
elif isinstance(item, dict):
|
||||||
|
item_type = item.get('type', 'TEXT').upper()
|
||||||
|
if item_type == 'TEXT':
|
||||||
|
adapted_content.append({
|
||||||
|
"type": "text",
|
||||||
|
"text": item.get('text', '')
|
||||||
|
})
|
||||||
|
# Future: handle other types
|
||||||
|
else:
|
||||||
|
# Fallback: convert to text
|
||||||
|
adapted_content.append({
|
||||||
|
"type": "text",
|
||||||
|
"text": str(item)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Simplify to string if only one text element (backward compatibility)
|
||||||
|
if len(adapted_content) == 1 and adapted_content[0].get('type') == 'text':
|
||||||
|
content = adapted_content[0]['text']
|
||||||
|
else:
|
||||||
|
content = adapted_content
|
||||||
|
elif isinstance(raw_content, str):
|
||||||
|
# Try to parse as JSON if it's a string (OCI format)
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
parsed = json.loads(raw_content)
|
||||||
|
if isinstance(parsed, dict) and 'text' in parsed:
|
||||||
|
content = parsed['text']
|
||||||
|
else:
|
||||||
|
content = raw_content
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
# Not JSON, use as-is
|
||||||
|
content = raw_content
|
||||||
|
else:
|
||||||
|
content = raw_content
|
||||||
|
else:
|
||||||
|
content = str(choice)
|
||||||
|
finish_reason = choice.finish_reason if hasattr(choice, 'finish_reason') else "stop"
|
||||||
|
else:
|
||||||
|
content = str(chat_response)
|
||||||
|
finish_reason = "stop"
|
||||||
|
|
||||||
|
# Create message
|
||||||
|
message = ChatMessage(
|
||||||
|
role="assistant",
|
||||||
|
content=content
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create choice
|
||||||
|
choice = ChatCompletionChoice(
|
||||||
|
index=0,
|
||||||
|
message=message,
|
||||||
|
finish_reason=finish_reason
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract usage information
|
||||||
|
usage = None
|
||||||
|
if hasattr(oci_response.data, 'usage'):
|
||||||
|
oci_usage = oci_response.data.usage
|
||||||
|
usage = ChatCompletionUsage(
|
||||||
|
prompt_tokens=getattr(oci_usage, 'prompt_tokens', 0) or 0,
|
||||||
|
completion_tokens=getattr(oci_usage, 'completion_tokens', 0) or 0,
|
||||||
|
total_tokens=getattr(oci_usage, 'total_tokens', 0) or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
return ChatCompletionResponse(
|
||||||
|
id=response_id,
|
||||||
|
object="chat.completion",
|
||||||
|
created=created_at,
|
||||||
|
model=model_id,
|
||||||
|
choices=[choice],
|
||||||
|
usage=usage
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def adapt_streaming_chunk(
|
||||||
|
chunk_data: str,
|
||||||
|
model_id: str,
|
||||||
|
request_id: str,
|
||||||
|
index: int = 0,
|
||||||
|
is_first: bool = False
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Adapt OCI GenAI streaming chunk to OpenAI SSE format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunk_data: Chunk text from OCI GenAI
|
||||||
|
model_id: Model identifier
|
||||||
|
request_id: Request ID
|
||||||
|
index: Chunk index
|
||||||
|
is_first: Whether this is the first chunk (should include role with empty content)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
OpenAI-compatible SSE formatted string
|
||||||
|
"""
|
||||||
|
created_at = int(time.time())
|
||||||
|
|
||||||
|
# Build delta - first chunk should include role with empty content
|
||||||
|
delta = {}
|
||||||
|
if is_first:
|
||||||
|
delta["role"] = "assistant"
|
||||||
|
delta["content"] = "" # First chunk has empty content like OpenAI
|
||||||
|
elif chunk_data:
|
||||||
|
delta["content"] = chunk_data
|
||||||
|
|
||||||
|
chunk = {
|
||||||
|
"id": request_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created_at,
|
||||||
|
"model": model_id,
|
||||||
|
"system_fingerprint": None,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": index,
|
||||||
|
"delta": delta,
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": None
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": None
|
||||||
|
}
|
||||||
|
|
||||||
|
import json
|
||||||
|
return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
|
|
||||||
|
def adapt_streaming_done(
|
||||||
|
model_id: str,
|
||||||
|
request_id: str,
|
||||||
|
usage: Optional[Dict[str, int]] = None
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Create final SSE chunks for streaming completion (OpenAI format).
|
||||||
|
|
||||||
|
Returns two chunks:
|
||||||
|
1. Finish chunk with finish_reason="stop"
|
||||||
|
2. Usage chunk with empty choices and usage stats
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_id: Model identifier
|
||||||
|
request_id: Request ID
|
||||||
|
usage: Optional usage statistics
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Final SSE formatted strings (finish chunk + usage chunk + [DONE])
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
created_at = int(time.time())
|
||||||
|
|
||||||
|
result = ""
|
||||||
|
|
||||||
|
# First chunk: finish_reason with empty delta
|
||||||
|
finish_chunk = {
|
||||||
|
"id": request_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created_at,
|
||||||
|
"model": model_id,
|
||||||
|
"system_fingerprint": None,
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"index": 0,
|
||||||
|
"delta": {},
|
||||||
|
"logprobs": None,
|
||||||
|
"finish_reason": "stop"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"usage": None
|
||||||
|
}
|
||||||
|
result += f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
|
# Second chunk: usage stats with empty choices (like OpenAI)
|
||||||
|
if usage:
|
||||||
|
usage_chunk = {
|
||||||
|
"id": request_id,
|
||||||
|
"object": "chat.completion.chunk",
|
||||||
|
"created": created_at,
|
||||||
|
"model": model_id,
|
||||||
|
"system_fingerprint": "",
|
||||||
|
"choices": [], # Empty choices array for usage chunk
|
||||||
|
"usage": usage
|
||||||
|
}
|
||||||
|
result += f"data: {json.dumps(usage_chunk, ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
|
# Final [DONE] marker
|
||||||
|
result += "data: [DONE]\n\n"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def adapt_embedding_response(
|
||||||
|
oci_response: Any,
|
||||||
|
model_id: str,
|
||||||
|
input_count: int
|
||||||
|
) -> EmbeddingResponse:
|
||||||
|
"""
|
||||||
|
Adapt OCI GenAI embedding response to OpenAI format.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
oci_response: OCI GenAI embedding response
|
||||||
|
model_id: Model identifier
|
||||||
|
input_count: Number of input texts
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
OpenAI-compatible embedding response
|
||||||
|
"""
|
||||||
|
embeddings_data = []
|
||||||
|
|
||||||
|
# Extract embeddings
|
||||||
|
if hasattr(oci_response.data, 'embeddings'):
|
||||||
|
embeddings = oci_response.data.embeddings
|
||||||
|
for idx, embedding in enumerate(embeddings):
|
||||||
|
embeddings_data.append(
|
||||||
|
EmbeddingData(
|
||||||
|
object="embedding",
|
||||||
|
embedding=embedding,
|
||||||
|
index=idx
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate usage (approximate)
|
||||||
|
# OCI doesn't always provide token counts, so we estimate
|
||||||
|
prompt_tokens = input_count * 10 # Rough estimate
|
||||||
|
|
||||||
|
usage = EmbeddingUsage(
|
||||||
|
prompt_tokens=prompt_tokens,
|
||||||
|
total_tokens=prompt_tokens
|
||||||
|
)
|
||||||
|
|
||||||
|
return EmbeddingResponse(
|
||||||
|
object="list",
|
||||||
|
data=embeddings_data,
|
||||||
|
model=model_id,
|
||||||
|
usage=usage
|
||||||
|
)
|
||||||
57
src/api/auth.py
Normal file
57
src/api/auth.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
"""
|
||||||
|
API authentication module.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from fastapi import HTTPException, Security, status
|
||||||
|
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
from core.config import get_settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
security = HTTPBearer()
|
||||||
|
|
||||||
|
|
||||||
|
async def verify_api_key(
|
||||||
|
credentials: HTTPAuthorizationCredentials = Security(security)
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Verify API key from Authorization header.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
credentials: HTTP authorization credentials
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Validated API key
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
HTTPException: If API key is invalid
|
||||||
|
"""
|
||||||
|
api_key = credentials.credentials
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
if api_key in settings.api_keys:
|
||||||
|
logger.debug("API key validated successfully")
|
||||||
|
return api_key
|
||||||
|
|
||||||
|
logger.warning(f"Invalid API key attempted: {api_key[:10]}...")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
|
detail="Invalid API key",
|
||||||
|
headers={"WWW-Authenticate": "Bearer"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def get_api_key(
|
||||||
|
credentials: HTTPAuthorizationCredentials = Security(security)
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Async wrapper for API key verification.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
credentials: HTTP authorization credentials
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Validated API key
|
||||||
|
"""
|
||||||
|
return await verify_api_key(credentials)
|
||||||
161
src/api/error_handler.py
Normal file
161
src/api/error_handler.py
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
"""
|
||||||
|
OCI 错误处理器 - 过滤敏感信息并提供用户友好的错误响应
|
||||||
|
|
||||||
|
此模块负责:
|
||||||
|
1. 拦截 OCI SDK ServiceError 异常
|
||||||
|
2. 过滤敏感信息(OCID、request-id、endpoint URLs)
|
||||||
|
3. 映射 OCI 错误码到 OpenAI 兼容格式
|
||||||
|
4. 生成用户友好的错误消息
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from oci.exceptions import ServiceError
|
||||||
|
|
||||||
|
from api.schemas import ErrorDetail, ErrorResponse
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class OCIErrorHandler:
|
||||||
|
"""OCI 错误处理器,负责过滤敏感信息并转换错误格式"""
|
||||||
|
|
||||||
|
# 预编译的正则模式(性能优化)
|
||||||
|
SENSITIVE_PATTERNS: Dict[str, re.Pattern] = {
|
||||||
|
'tenancy_ocid': re.compile(r'ocid1\.tenancy\.oc1\.\.[a-z0-9]+', re.IGNORECASE),
|
||||||
|
'compartment_ocid': re.compile(r'ocid1\.compartment\.oc1\.\.[a-z0-9]+', re.IGNORECASE),
|
||||||
|
'user_ocid': re.compile(r'ocid1\.user\.oc1\.\.[a-z0-9]+', re.IGNORECASE),
|
||||||
|
'endpoint_ocid': re.compile(r'ocid1\.generativeaiendpoint\.[a-z0-9\.\-]+', re.IGNORECASE),
|
||||||
|
'request_id': re.compile(r'[A-F0-9]{32}(/[A-F0-9]{32})*'),
|
||||||
|
'endpoint_url': re.compile(r'https://[a-z0-9\.\-]+\.oci(\.oraclecloud)?\.com[^\s\)]*', re.IGNORECASE),
|
||||||
|
}
|
||||||
|
|
||||||
|
# OCI 状态码到 OpenAI 错误类型的映射
|
||||||
|
OCI_TO_OPENAI_ERROR_TYPE: Dict[int, str] = {
|
||||||
|
400: "invalid_request_error",
|
||||||
|
401: "authentication_error",
|
||||||
|
403: "permission_error",
|
||||||
|
404: "invalid_request_error",
|
||||||
|
409: "invalid_request_error",
|
||||||
|
429: "rate_limit_error",
|
||||||
|
500: "server_error",
|
||||||
|
502: "server_error",
|
||||||
|
503: "server_error",
|
||||||
|
504: "server_error",
|
||||||
|
}
|
||||||
|
|
||||||
|
# 用户友好的错误消息模板
|
||||||
|
USER_FRIENDLY_MESSAGES: Dict[int, str] = {
|
||||||
|
400: "Invalid request parameters. Please check your input.",
|
||||||
|
401: "Authentication failed. Please verify your API credentials.",
|
||||||
|
403: "Access denied. You don't have permission to access this resource.",
|
||||||
|
404: "The requested resource was not found.",
|
||||||
|
409: "Request conflict. The resource may have been modified.",
|
||||||
|
429: "Request rate limit exceeded. Please retry after a short delay.",
|
||||||
|
500: "Internal server error. Please try again later.",
|
||||||
|
502: "Bad gateway. The upstream service is unavailable.",
|
||||||
|
503: "Service temporarily unavailable. Please try again later.",
|
||||||
|
504: "Gateway timeout. The request took too long to process.",
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def sanitize_oci_error(cls, exc: ServiceError) -> ErrorResponse:
|
||||||
|
"""
|
||||||
|
处理 OCI ServiceError,过滤敏感信息并返回用户友好的错误响应
|
||||||
|
|
||||||
|
Args:
|
||||||
|
exc: OCI ServiceError 异常对象
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ErrorResponse: 过滤后的错误响应
|
||||||
|
"""
|
||||||
|
# 完整错误记录到日志(供调试)
|
||||||
|
logger.error(
|
||||||
|
f"OCI ServiceError: status={exc.status}, code={exc.code}, "
|
||||||
|
f"request_id={exc.request_id}, message={exc.message}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 过滤敏感信息
|
||||||
|
filtered_message = cls.filter_sensitive_info(str(exc.message))
|
||||||
|
|
||||||
|
# 生成用户友好消息
|
||||||
|
user_message = cls.create_user_friendly_message(exc.status, filtered_message)
|
||||||
|
|
||||||
|
# 映射错误类型
|
||||||
|
error_type = cls.map_oci_status_to_openai(exc.status)
|
||||||
|
|
||||||
|
# 构建 ErrorResponse
|
||||||
|
error_detail = ErrorDetail(
|
||||||
|
message=user_message,
|
||||||
|
type=error_type,
|
||||||
|
code=f"oci_{exc.code.lower()}" if exc.code else "oci_error"
|
||||||
|
)
|
||||||
|
|
||||||
|
return ErrorResponse(error=error_detail)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def filter_sensitive_info(cls, text: str) -> str:
|
||||||
|
"""
|
||||||
|
过滤文本中的敏感信息
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: 原始文本
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 过滤后的文本
|
||||||
|
"""
|
||||||
|
filtered = text
|
||||||
|
|
||||||
|
# 遍历所有正则模式,替换敏感信息
|
||||||
|
for pattern_name, regex_pattern in cls.SENSITIVE_PATTERNS.items():
|
||||||
|
if pattern_name == 'tenancy_ocid':
|
||||||
|
filtered = regex_pattern.sub('tenancy:***', filtered)
|
||||||
|
elif pattern_name == 'endpoint_url':
|
||||||
|
filtered = regex_pattern.sub('https://***', filtered)
|
||||||
|
elif pattern_name == 'request_id':
|
||||||
|
filtered = regex_pattern.sub('request-id:***', filtered)
|
||||||
|
else:
|
||||||
|
filtered = regex_pattern.sub('***', filtered)
|
||||||
|
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def map_oci_status_to_openai(cls, status_code: int) -> str:
|
||||||
|
"""
|
||||||
|
映射 OCI 状态码到 OpenAI 错误类型
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status_code: HTTP 状态码
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: OpenAI 错误类型
|
||||||
|
"""
|
||||||
|
# 使用映射表转换,未知状态码默认为 server_error
|
||||||
|
return cls.OCI_TO_OPENAI_ERROR_TYPE.get(status_code, "server_error")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def create_user_friendly_message(cls, status_code: int, filtered_message: str) -> str:
|
||||||
|
"""
|
||||||
|
生成用户友好的错误消息
|
||||||
|
|
||||||
|
Args:
|
||||||
|
status_code: HTTP 状态码
|
||||||
|
filtered_message: 已过滤的原始错误消息
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: 用户友好的错误消息
|
||||||
|
"""
|
||||||
|
# 优先使用预定义的友好消息
|
||||||
|
base_message = cls.USER_FRIENDLY_MESSAGES.get(
|
||||||
|
status_code,
|
||||||
|
"An unexpected error occurred. Please try again."
|
||||||
|
)
|
||||||
|
|
||||||
|
# 如果过滤后的消息仍有有用信息,附加到基础消息后
|
||||||
|
if filtered_message and filtered_message != str(status_code):
|
||||||
|
# 截取前200字符避免过长
|
||||||
|
truncated = filtered_message[:200]
|
||||||
|
return f"{base_message} Details: {truncated}"
|
||||||
|
|
||||||
|
return base_message
|
||||||
60
src/api/exceptions.py
Normal file
60
src/api/exceptions.py
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
"""
|
||||||
|
Custom exceptions for the API.
|
||||||
|
"""
|
||||||
|
from fastapi import HTTPException
|
||||||
|
|
||||||
|
|
||||||
|
class ModelNotFoundException(HTTPException):
|
||||||
|
"""
|
||||||
|
Exception raised when a requested model is not found.
|
||||||
|
|
||||||
|
This exception is OpenAI API compatible and returns:
|
||||||
|
- HTTP Status: 404
|
||||||
|
- Error type: "invalid_request_error"
|
||||||
|
- Error code: "model_not_found"
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, model_id: str):
|
||||||
|
"""
|
||||||
|
Initialize ModelNotFoundException.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_id: The model ID that was not found
|
||||||
|
"""
|
||||||
|
self.model_id = model_id
|
||||||
|
self.error_code = "model_not_found"
|
||||||
|
self.error_type = "invalid_request_error"
|
||||||
|
|
||||||
|
# HTTPException detail will be the message
|
||||||
|
super().__init__(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"The model '{model_id}' does not exist or is not supported"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class InvalidModelTypeException(HTTPException):
|
||||||
|
"""
|
||||||
|
Exception raised when a model exists but is not the correct type.
|
||||||
|
|
||||||
|
For example, using an embedding model for chat or vice versa.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, model_id: str, expected_type: str, actual_type: str):
|
||||||
|
"""
|
||||||
|
Initialize InvalidModelTypeException.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_id: The model ID
|
||||||
|
expected_type: Expected model type (e.g., "chat", "embedding")
|
||||||
|
actual_type: Actual model type
|
||||||
|
"""
|
||||||
|
self.model_id = model_id
|
||||||
|
self.expected_type = expected_type
|
||||||
|
self.actual_type = actual_type
|
||||||
|
self.error_code = "invalid_model_type"
|
||||||
|
self.error_type = "invalid_request_error"
|
||||||
|
|
||||||
|
super().__init__(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"Model '{model_id}' is a {actual_type} model, not a {expected_type} model"
|
||||||
|
)
|
||||||
1
src/api/routers/__init__.py
Normal file
1
src/api/routers/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""API routers module."""
|
||||||
417
src/api/routers/chat.py
Normal file
417
src/api/routers/chat.py
Normal file
@@ -0,0 +1,417 @@
|
|||||||
|
"""
|
||||||
|
Chat completions API router - OpenAI compatible chat endpoint.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
from typing import AsyncIterator, Union
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
from fastapi.responses import StreamingResponse
|
||||||
|
|
||||||
|
from oci.exceptions import ServiceError
|
||||||
|
|
||||||
|
from api.auth import get_api_key
|
||||||
|
from api.schemas import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ErrorDetail
|
||||||
|
from api.error_handler import OCIErrorHandler
|
||||||
|
from api.exceptions import ModelNotFoundException, InvalidModelTypeException
|
||||||
|
from api.adapters.request_adapter import adapt_chat_messages, extract_chat_params
|
||||||
|
from api.adapters.response_adapter import (
|
||||||
|
adapt_chat_response,
|
||||||
|
adapt_streaming_chunk,
|
||||||
|
adapt_streaming_done,
|
||||||
|
)
|
||||||
|
from core.config import get_settings
|
||||||
|
from core.client_manager import get_client_manager
|
||||||
|
from core.models import get_model_config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/chat",
|
||||||
|
tags=["chat"],
|
||||||
|
dependencies=[Depends(get_api_key)]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def extract_delta_from_chunk(chunk) -> str:
|
||||||
|
"""
|
||||||
|
Extract delta text content from OCI streaming chunk.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunk: OCI streaming response chunk (can be SSE Event, parsed object, etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Delta text content or empty string
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Handle SSE Event objects (from SSEClient)
|
||||||
|
if hasattr(chunk, 'data'):
|
||||||
|
import json
|
||||||
|
# Parse JSON data from SSE event
|
||||||
|
try:
|
||||||
|
parsed = json.loads(chunk.data)
|
||||||
|
|
||||||
|
# Recursively extract from parsed object
|
||||||
|
if isinstance(parsed, dict):
|
||||||
|
# OCI Streaming format: message.content[].text
|
||||||
|
if 'message' in parsed and 'content' in parsed['message']:
|
||||||
|
content_array = parsed['message']['content']
|
||||||
|
if isinstance(content_array, list) and len(content_array) > 0:
|
||||||
|
# Extract text from all TEXT type content items
|
||||||
|
text_parts = []
|
||||||
|
for item in content_array:
|
||||||
|
if isinstance(item, dict) and item.get('type') == 'TEXT' and 'text' in item:
|
||||||
|
text_parts.append(item['text'])
|
||||||
|
if text_parts:
|
||||||
|
return ''.join(text_parts)
|
||||||
|
|
||||||
|
# Try to get text from various possible locations
|
||||||
|
if 'text' in parsed:
|
||||||
|
return parsed['text']
|
||||||
|
if 'chatResponse' in parsed and 'text' in parsed['chatResponse']:
|
||||||
|
return parsed['chatResponse']['text']
|
||||||
|
if 'choices' in parsed and len(parsed['choices']) > 0:
|
||||||
|
choice = parsed['choices'][0]
|
||||||
|
if 'delta' in choice and 'content' in choice['delta']:
|
||||||
|
return choice['delta']['content']
|
||||||
|
|
||||||
|
except (json.JSONDecodeError, KeyError, TypeError):
|
||||||
|
# Return raw data if not JSON
|
||||||
|
return str(chunk.data) if chunk.data else ""
|
||||||
|
|
||||||
|
# Try to extract from chat_response.text (Cohere format)
|
||||||
|
if hasattr(chunk, 'chat_response') and hasattr(chunk.chat_response, 'text'):
|
||||||
|
return chunk.chat_response.text
|
||||||
|
|
||||||
|
# Try to extract from choices[0].delta.content (Generic format)
|
||||||
|
if hasattr(chunk, 'chat_response') and hasattr(chunk.chat_response, 'choices'):
|
||||||
|
if len(chunk.chat_response.choices) > 0:
|
||||||
|
choice = chunk.chat_response.choices[0]
|
||||||
|
if hasattr(choice, 'delta') and hasattr(choice.delta, 'content'):
|
||||||
|
content = choice.delta.content
|
||||||
|
if isinstance(content, str):
|
||||||
|
return content
|
||||||
|
elif isinstance(content, list):
|
||||||
|
# Handle TextContent list
|
||||||
|
text_parts = []
|
||||||
|
for item in content:
|
||||||
|
if isinstance(item, dict) and 'text' in item:
|
||||||
|
text_parts.append(item['text'])
|
||||||
|
elif hasattr(item, 'text'):
|
||||||
|
text_parts.append(item.text)
|
||||||
|
return "".join(text_parts)
|
||||||
|
|
||||||
|
# Try direct text attribute
|
||||||
|
if hasattr(chunk, 'text'):
|
||||||
|
return chunk.text
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to extract delta from chunk: {e}")
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def extract_content_from_response(chat_response) -> str:
|
||||||
|
"""
|
||||||
|
Extract full content from non-streaming OCI response.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chat_response: OCI chat response object
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Full text content
|
||||||
|
"""
|
||||||
|
if hasattr(chat_response, 'text'):
|
||||||
|
raw_text = chat_response.text
|
||||||
|
# Try to parse as JSON if it's a string (OCI format)
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
parsed = json.loads(raw_text)
|
||||||
|
if isinstance(parsed, dict) and 'text' in parsed:
|
||||||
|
return parsed['text']
|
||||||
|
return raw_text
|
||||||
|
except (json.JSONDecodeError, ValueError, TypeError):
|
||||||
|
return raw_text
|
||||||
|
|
||||||
|
elif hasattr(chat_response, 'choices') and len(chat_response.choices) > 0:
|
||||||
|
choice = chat_response.choices[0]
|
||||||
|
if hasattr(choice, 'message'):
|
||||||
|
raw_content = choice.message.content
|
||||||
|
# Handle list format
|
||||||
|
if isinstance(raw_content, list):
|
||||||
|
text_parts = []
|
||||||
|
for item in raw_content:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
text_parts.append(item.get('text', ''))
|
||||||
|
elif hasattr(item, 'text'):
|
||||||
|
text_parts.append(item.text)
|
||||||
|
else:
|
||||||
|
text_parts.append(str(item))
|
||||||
|
return "".join(text_parts)
|
||||||
|
elif isinstance(raw_content, str):
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
parsed = json.loads(raw_content)
|
||||||
|
if isinstance(parsed, dict) and 'text' in parsed:
|
||||||
|
return parsed['text']
|
||||||
|
return raw_content
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
return raw_content
|
||||||
|
else:
|
||||||
|
return str(raw_content)
|
||||||
|
return str(choice)
|
||||||
|
|
||||||
|
return str(chat_response)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/completions", response_model=ChatCompletionResponse)
|
||||||
|
async def create_chat_completion(request: ChatCompletionRequest):
|
||||||
|
"""
|
||||||
|
Create a chat completion using OCI Generative AI.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: Chat completion request
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Chat completion response
|
||||||
|
"""
|
||||||
|
logger.info(f"Chat completion request for model: {request.model}")
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
# Validate model exists
|
||||||
|
model_config = get_model_config(request.model)
|
||||||
|
if not model_config:
|
||||||
|
raise ModelNotFoundException(request.model)
|
||||||
|
|
||||||
|
# Validate model type is chat (ondemand or dedicated)
|
||||||
|
if model_config.type not in ("ondemand", "dedicated"):
|
||||||
|
raise InvalidModelTypeException(
|
||||||
|
model_id=request.model,
|
||||||
|
expected_type="chat",
|
||||||
|
actual_type=model_config.type
|
||||||
|
)
|
||||||
|
|
||||||
|
# Note: Multimodal capability validation is handled by the model itself
|
||||||
|
# If a model doesn't support certain content types, it will raise an error
|
||||||
|
# For example, Cohere models will raise ValueError for non-text content
|
||||||
|
|
||||||
|
# Get OCI client from manager (轮询负载均衡)
|
||||||
|
client_manager = get_client_manager()
|
||||||
|
oci_client = client_manager.get_client()
|
||||||
|
|
||||||
|
# Adapt messages
|
||||||
|
messages = adapt_chat_messages([msg.dict() for msg in request.messages])
|
||||||
|
|
||||||
|
# Extract parameters
|
||||||
|
params = extract_chat_params(request)
|
||||||
|
|
||||||
|
# Check global streaming setting
|
||||||
|
# If streaming is globally disabled, override client request
|
||||||
|
enable_stream = request.stream and settings.enable_streaming
|
||||||
|
|
||||||
|
if not settings.enable_streaming and request.stream:
|
||||||
|
logger.info("Streaming requested but globally disabled via ENABLE_STREAMING=false")
|
||||||
|
|
||||||
|
# Handle streaming
|
||||||
|
if enable_stream:
|
||||||
|
request_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
|
||||||
|
|
||||||
|
async def generate_stream() -> AsyncIterator[str]:
|
||||||
|
"""Generate streaming response with true non-blocking streaming."""
|
||||||
|
try:
|
||||||
|
# Run OCI SDK call in executor to prevent blocking
|
||||||
|
# This is critical for achieving true streaming (msToFirstChunk < 1s)
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
response = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: oci_client.chat(
|
||||||
|
model_id=request.model,
|
||||||
|
messages=messages,
|
||||||
|
temperature=params["temperature"],
|
||||||
|
max_tokens=params["max_tokens"],
|
||||||
|
top_p=params["top_p"],
|
||||||
|
stream=True, # Enable real streaming
|
||||||
|
tools=params.get("tools"),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Process real streaming response
|
||||||
|
accumulated_usage = None
|
||||||
|
|
||||||
|
# Check if response.data is an SSE stream (iterable)
|
||||||
|
# When stream=True, OCI SDK returns response.data as SSEClient
|
||||||
|
try:
|
||||||
|
# Try to iterate over the stream
|
||||||
|
stream_data = response.data if hasattr(response, 'data') else response
|
||||||
|
|
||||||
|
# Check if it's SSEClient or any iterable type
|
||||||
|
stream_type_name = type(stream_data).__name__
|
||||||
|
is_sse_client = 'SSEClient' in stream_type_name
|
||||||
|
is_iterable = hasattr(stream_data, '__iter__') or hasattr(stream_data, '__next__')
|
||||||
|
|
||||||
|
# SSEClient is always treated as streaming, even if hasattr check fails
|
||||||
|
if is_sse_client or is_iterable:
|
||||||
|
# Real streaming: iterate over chunks
|
||||||
|
# SSEClient requires calling .events() method to iterate
|
||||||
|
if is_sse_client and hasattr(stream_data, 'events'):
|
||||||
|
iterator = stream_data.events()
|
||||||
|
else:
|
||||||
|
iterator = stream_data
|
||||||
|
|
||||||
|
# Send first chunk with role and empty content (OpenAI format)
|
||||||
|
yield adapt_streaming_chunk("", request.model, request_id, 0, is_first=True)
|
||||||
|
|
||||||
|
# Use queue for thread-safe chunk forwarding
|
||||||
|
import queue
|
||||||
|
import threading
|
||||||
|
chunk_queue = queue.Queue()
|
||||||
|
|
||||||
|
def read_chunks():
|
||||||
|
"""Read chunks in background thread and put in queue."""
|
||||||
|
try:
|
||||||
|
for chunk in iterator:
|
||||||
|
chunk_queue.put(("chunk", chunk))
|
||||||
|
chunk_queue.put(("done", None))
|
||||||
|
except Exception as e:
|
||||||
|
chunk_queue.put(("error", e))
|
||||||
|
|
||||||
|
# Start background thread to read chunks
|
||||||
|
reader_thread = threading.Thread(target=read_chunks, daemon=True)
|
||||||
|
reader_thread.start()
|
||||||
|
|
||||||
|
# Yield chunks as they arrive from queue
|
||||||
|
while True:
|
||||||
|
# Non-blocking queue get with timeout
|
||||||
|
try:
|
||||||
|
msg_type, data = await loop.run_in_executor(
|
||||||
|
None,
|
||||||
|
lambda: chunk_queue.get(timeout=0.01)
|
||||||
|
)
|
||||||
|
except queue.Empty:
|
||||||
|
# Allow other async tasks to run
|
||||||
|
await asyncio.sleep(0)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if msg_type == "done":
|
||||||
|
break
|
||||||
|
elif msg_type == "error":
|
||||||
|
raise data
|
||||||
|
elif msg_type == "chunk":
|
||||||
|
chunk = data
|
||||||
|
# Extract delta content from chunk
|
||||||
|
delta_text = extract_delta_from_chunk(chunk)
|
||||||
|
|
||||||
|
if delta_text:
|
||||||
|
yield adapt_streaming_chunk(delta_text, request.model, request_id, 0, is_first=False)
|
||||||
|
|
||||||
|
# Try to extract usage from chunk (typically in final chunk)
|
||||||
|
# Handle both SSE Event format and object format
|
||||||
|
if hasattr(chunk, 'data'):
|
||||||
|
# SSE Event - parse JSON to extract usage
|
||||||
|
try:
|
||||||
|
import json
|
||||||
|
parsed = json.loads(chunk.data)
|
||||||
|
if isinstance(parsed, dict) and 'usage' in parsed:
|
||||||
|
usage_data = parsed['usage']
|
||||||
|
accumulated_usage = {
|
||||||
|
"prompt_tokens": usage_data.get('promptTokens', 0) or 0,
|
||||||
|
"completion_tokens": usage_data.get('completionTokens', 0) or 0,
|
||||||
|
"total_tokens": usage_data.get('totalTokens', 0) or 0
|
||||||
|
}
|
||||||
|
except (json.JSONDecodeError, KeyError, TypeError):
|
||||||
|
pass
|
||||||
|
elif hasattr(chunk, 'usage') and chunk.usage:
|
||||||
|
# Object format
|
||||||
|
accumulated_usage = {
|
||||||
|
"prompt_tokens": getattr(chunk.usage, 'prompt_tokens', 0) or 0,
|
||||||
|
"completion_tokens": getattr(chunk.usage, 'completion_tokens', 0) or 0,
|
||||||
|
"total_tokens": getattr(chunk.usage, 'total_tokens', 0) or 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Send done message with usage
|
||||||
|
yield adapt_streaming_done(request.model, request_id, usage=accumulated_usage)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Fallback: non-streaming response, simulate streaming
|
||||||
|
logger.warning(f"OCI SDK returned non-iterable response (type: {type(stream_data).__name__}), falling back to simulated streaming")
|
||||||
|
|
||||||
|
# Extract text from non-streaming response
|
||||||
|
chat_response = stream_data.chat_response if hasattr(stream_data, 'chat_response') else stream_data
|
||||||
|
content = extract_content_from_response(chat_response)
|
||||||
|
|
||||||
|
# Extract usage information
|
||||||
|
if hasattr(stream_data, 'usage'):
|
||||||
|
oci_usage = stream_data.usage
|
||||||
|
accumulated_usage = {
|
||||||
|
"prompt_tokens": getattr(oci_usage, 'prompt_tokens', 0) or 0,
|
||||||
|
"completion_tokens": getattr(oci_usage, 'completion_tokens', 0) or 0,
|
||||||
|
"total_tokens": getattr(oci_usage, 'total_tokens', 0) or 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Simulate streaming by chunking
|
||||||
|
# First send empty chunk with role (OpenAI format)
|
||||||
|
yield adapt_streaming_chunk("", request.model, request_id, 0, is_first=True)
|
||||||
|
|
||||||
|
chunk_size = settings.stream_chunk_size
|
||||||
|
for i in range(0, len(content), chunk_size):
|
||||||
|
chunk = content[i:i + chunk_size]
|
||||||
|
yield adapt_streaming_chunk(chunk, request.model, request_id, 0, is_first=False)
|
||||||
|
|
||||||
|
yield adapt_streaming_done(request.model, request_id, usage=accumulated_usage)
|
||||||
|
|
||||||
|
except TypeError as te:
|
||||||
|
# Handle case where response is not iterable at all
|
||||||
|
logger.error(f"Response is not iterable: {te}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in streaming: {str(e)}", exc_info=True)
|
||||||
|
import json
|
||||||
|
|
||||||
|
# 根据异常类型处理并过滤敏感信息
|
||||||
|
if isinstance(e, ServiceError):
|
||||||
|
error_response = OCIErrorHandler.sanitize_oci_error(e)
|
||||||
|
else:
|
||||||
|
# 通用错误也要过滤可能包含的敏感信息
|
||||||
|
filtered_msg = OCIErrorHandler.filter_sensitive_info(str(e))
|
||||||
|
error_response = ErrorResponse(
|
||||||
|
error=ErrorDetail(
|
||||||
|
message="An error occurred during streaming",
|
||||||
|
type="server_error",
|
||||||
|
code="streaming_error"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
yield f"data: {json.dumps(error_response.dict(), ensure_ascii=False)}\n\n"
|
||||||
|
|
||||||
|
return StreamingResponse(
|
||||||
|
generate_stream(),
|
||||||
|
media_type="text/event-stream"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Non-streaming response
|
||||||
|
try:
|
||||||
|
response = oci_client.chat(
|
||||||
|
model_id=request.model,
|
||||||
|
messages=messages,
|
||||||
|
temperature=params["temperature"],
|
||||||
|
max_tokens=params["max_tokens"],
|
||||||
|
top_p=params["top_p"],
|
||||||
|
stream=False,
|
||||||
|
tools=params.get("tools"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Adapt response to OpenAI format
|
||||||
|
openai_response = adapt_chat_response(response, request.model)
|
||||||
|
|
||||||
|
if settings.log_responses:
|
||||||
|
logger.debug(f"Response: {openai_response}")
|
||||||
|
|
||||||
|
return openai_response
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in chat completion: {str(e)}", exc_info=True)
|
||||||
|
# 直接 raise,让全局异常处理器统一过滤敏感信息
|
||||||
|
raise
|
||||||
85
src/api/routers/embeddings.py
Normal file
85
src/api/routers/embeddings.py
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
"""
|
||||||
|
Embeddings API router - OpenAI compatible embeddings endpoint.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException
|
||||||
|
|
||||||
|
from api.auth import get_api_key
|
||||||
|
from api.schemas import EmbeddingRequest, EmbeddingResponse
|
||||||
|
from api.adapters.request_adapter import adapt_embedding_input
|
||||||
|
from api.adapters.response_adapter import adapt_embedding_response
|
||||||
|
from api.exceptions import ModelNotFoundException, InvalidModelTypeException
|
||||||
|
from core.config import get_settings
|
||||||
|
from core.client_manager import get_client_manager
|
||||||
|
from core.models import get_model_config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/embeddings",
|
||||||
|
tags=["embeddings"],
|
||||||
|
dependencies=[Depends(get_api_key)]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("", response_model=EmbeddingResponse)
|
||||||
|
@router.post("/", response_model=EmbeddingResponse)
|
||||||
|
async def create_embeddings(request: EmbeddingRequest):
|
||||||
|
"""
|
||||||
|
Create embeddings using OCI Generative AI.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
request: Embedding request
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Embedding response
|
||||||
|
"""
|
||||||
|
logger.info(f"Embedding request for model: {request.model}")
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
# Validate model exists
|
||||||
|
model_config = get_model_config(request.model)
|
||||||
|
if not model_config:
|
||||||
|
raise ModelNotFoundException(request.model)
|
||||||
|
|
||||||
|
# Validate model type is embedding
|
||||||
|
if model_config.type != "embedding":
|
||||||
|
raise InvalidModelTypeException(
|
||||||
|
model_id=request.model,
|
||||||
|
expected_type="embedding",
|
||||||
|
actual_type=model_config.type
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get OCI client from manager (轮询负载均衡)
|
||||||
|
client_manager = get_client_manager()
|
||||||
|
oci_client = client_manager.get_client()
|
||||||
|
|
||||||
|
# Adapt input
|
||||||
|
texts = adapt_embedding_input(request)
|
||||||
|
input_count = len(texts)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Generate embeddings
|
||||||
|
response = oci_client.embed(
|
||||||
|
model_id=request.model,
|
||||||
|
texts=texts,
|
||||||
|
truncate=settings.embed_truncate,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Adapt response to OpenAI format
|
||||||
|
openai_response = adapt_embedding_response(
|
||||||
|
response,
|
||||||
|
request.model,
|
||||||
|
input_count
|
||||||
|
)
|
||||||
|
|
||||||
|
if settings.log_responses:
|
||||||
|
logger.debug(f"Embeddings generated: {len(openai_response.data)} vectors")
|
||||||
|
|
||||||
|
return openai_response
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in embedding generation: {str(e)}", exc_info=True)
|
||||||
|
# 直接 raise,让全局异常处理器统一过滤敏感信息
|
||||||
|
raise
|
||||||
78
src/api/routers/models.py
Normal file
78
src/api/routers/models.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
"""
|
||||||
|
Models API router - OpenAI compatible model listing.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from fastapi import APIRouter, Depends
|
||||||
|
|
||||||
|
from api.auth import get_api_key
|
||||||
|
from api.schemas import ModelListResponse, ModelInfo
|
||||||
|
from core.models import get_all_models
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
router = APIRouter(
|
||||||
|
prefix="/models",
|
||||||
|
tags=["models"],
|
||||||
|
dependencies=[Depends(get_api_key)]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("", response_model=ModelListResponse)
|
||||||
|
@router.get("/", response_model=ModelListResponse)
|
||||||
|
async def list_models():
|
||||||
|
"""
|
||||||
|
List available models in OpenAI format.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ModelListResponse: List of available models
|
||||||
|
"""
|
||||||
|
logger.info("Listing available models")
|
||||||
|
|
||||||
|
models = get_all_models()
|
||||||
|
|
||||||
|
model_list = [
|
||||||
|
ModelInfo(
|
||||||
|
id=model.id,
|
||||||
|
object="model",
|
||||||
|
created=0,
|
||||||
|
owned_by="oracle"
|
||||||
|
)
|
||||||
|
for model in models
|
||||||
|
]
|
||||||
|
|
||||||
|
return ModelListResponse(
|
||||||
|
object="list",
|
||||||
|
data=model_list
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/{model_id}", response_model=ModelInfo)
|
||||||
|
async def get_model(model_id: str):
|
||||||
|
"""
|
||||||
|
Get information about a specific model.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_id: Model identifier
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ModelInfo: Model information
|
||||||
|
"""
|
||||||
|
logger.info(f"Getting model info: {model_id}")
|
||||||
|
|
||||||
|
from core.models import get_model_config
|
||||||
|
|
||||||
|
model_config = get_model_config(model_id)
|
||||||
|
|
||||||
|
if not model_config:
|
||||||
|
from fastapi import HTTPException
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Model {model_id} not found"
|
||||||
|
)
|
||||||
|
|
||||||
|
return ModelInfo(
|
||||||
|
id=model_config.id,
|
||||||
|
object="model",
|
||||||
|
created=0,
|
||||||
|
owned_by="oracle"
|
||||||
|
)
|
||||||
139
src/api/schemas.py
Normal file
139
src/api/schemas.py
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
"""
|
||||||
|
OpenAI-compatible API schemas.
|
||||||
|
"""
|
||||||
|
from typing import List, Optional, Union, Dict, Any, Literal
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
|
# ============= Chat Completion Schemas =============
|
||||||
|
|
||||||
|
class ChatMessage(BaseModel):
|
||||||
|
"""A chat message."""
|
||||||
|
role: Literal["system", "user", "assistant", "tool"]
|
||||||
|
content: Union[str, List[Dict[str, Any]]]
|
||||||
|
name: Optional[str] = None
|
||||||
|
tool_calls: Optional[List[Dict[str, Any]]] = None
|
||||||
|
tool_call_id: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionRequest(BaseModel):
|
||||||
|
"""OpenAI chat completion request."""
|
||||||
|
model: str
|
||||||
|
messages: List[ChatMessage]
|
||||||
|
temperature: Optional[float] = 0.7
|
||||||
|
top_p: Optional[float] = 1.0
|
||||||
|
n: Optional[int] = 1
|
||||||
|
stream: Optional[bool] = True # Default to streaming
|
||||||
|
stop: Optional[Union[str, List[str]]] = None
|
||||||
|
max_tokens: Optional[int] = None
|
||||||
|
presence_penalty: Optional[float] = 0.0
|
||||||
|
frequency_penalty: Optional[float] = 0.0
|
||||||
|
logit_bias: Optional[Dict[str, float]] = None
|
||||||
|
user: Optional[str] = None
|
||||||
|
tools: Optional[List[Dict[str, Any]]] = None
|
||||||
|
tool_choice: Optional[Union[str, Dict[str, Any]]] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionChoice(BaseModel):
|
||||||
|
"""A chat completion choice."""
|
||||||
|
index: int
|
||||||
|
message: ChatMessage
|
||||||
|
finish_reason: Optional[str] = None
|
||||||
|
logprobs: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionUsage(BaseModel):
|
||||||
|
"""Token usage information."""
|
||||||
|
prompt_tokens: int
|
||||||
|
completion_tokens: int
|
||||||
|
total_tokens: int
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionResponse(BaseModel):
|
||||||
|
"""OpenAI chat completion response."""
|
||||||
|
id: str
|
||||||
|
object: str = "chat.completion"
|
||||||
|
created: int
|
||||||
|
model: str
|
||||||
|
choices: List[ChatCompletionChoice]
|
||||||
|
usage: Optional[ChatCompletionUsage] = None
|
||||||
|
system_fingerprint: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionStreamChoice(BaseModel):
|
||||||
|
"""A streaming chat completion choice."""
|
||||||
|
index: int
|
||||||
|
delta: Dict[str, Any]
|
||||||
|
finish_reason: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ChatCompletionStreamResponse(BaseModel):
|
||||||
|
"""OpenAI streaming chat completion response."""
|
||||||
|
id: str
|
||||||
|
object: str = "chat.completion.chunk"
|
||||||
|
created: int
|
||||||
|
model: str
|
||||||
|
choices: List[ChatCompletionStreamChoice]
|
||||||
|
system_fingerprint: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
# ============= Embedding Schemas =============
|
||||||
|
|
||||||
|
class EmbeddingRequest(BaseModel):
|
||||||
|
"""OpenAI embedding request."""
|
||||||
|
model: str
|
||||||
|
input: Union[str, List[str]]
|
||||||
|
encoding_format: Optional[str] = "float"
|
||||||
|
user: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingData(BaseModel):
|
||||||
|
"""Embedding data."""
|
||||||
|
object: str = "embedding"
|
||||||
|
embedding: List[float]
|
||||||
|
index: int
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingUsage(BaseModel):
|
||||||
|
"""Embedding usage information."""
|
||||||
|
prompt_tokens: int
|
||||||
|
total_tokens: int
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingResponse(BaseModel):
|
||||||
|
"""OpenAI embedding response."""
|
||||||
|
object: str = "list"
|
||||||
|
data: List[EmbeddingData]
|
||||||
|
model: str
|
||||||
|
usage: EmbeddingUsage
|
||||||
|
|
||||||
|
|
||||||
|
# ============= Model Schemas =============
|
||||||
|
|
||||||
|
class ModelInfo(BaseModel):
|
||||||
|
"""Model information."""
|
||||||
|
id: str
|
||||||
|
object: str = "model"
|
||||||
|
created: int = 0
|
||||||
|
owned_by: str = "oracle"
|
||||||
|
|
||||||
|
|
||||||
|
class ModelListResponse(BaseModel):
|
||||||
|
"""Model list response."""
|
||||||
|
object: str = "list"
|
||||||
|
data: List[ModelInfo]
|
||||||
|
|
||||||
|
|
||||||
|
# ============= Error Schemas =============
|
||||||
|
|
||||||
|
class ErrorDetail(BaseModel):
|
||||||
|
"""Error detail."""
|
||||||
|
message: str
|
||||||
|
type: str
|
||||||
|
param: Optional[str] = None
|
||||||
|
code: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
|
class ErrorResponse(BaseModel):
|
||||||
|
"""Error response."""
|
||||||
|
error: ErrorDetail
|
||||||
1
src/core/__init__.py
Normal file
1
src/core/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
"""Core module for OCI GenAI Gateway."""
|
||||||
70
src/core/client_manager.py
Normal file
70
src/core/client_manager.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
"""
|
||||||
|
简单的 OCI 客户端管理器,支持多 profile 轮询负载均衡
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from typing import List, Dict
|
||||||
|
from threading import Lock
|
||||||
|
|
||||||
|
from .config import Settings, get_settings
|
||||||
|
from .oci_client import OCIGenAIClient
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class OCIClientManager:
|
||||||
|
"""OCI 客户端管理器,支持轮询负载均衡和客户端连接池"""
|
||||||
|
|
||||||
|
def __init__(self, settings: Settings = None):
|
||||||
|
self.settings = settings or get_settings()
|
||||||
|
self.profiles = self.settings.get_profiles()
|
||||||
|
self.current_index = 0
|
||||||
|
self.lock = Lock()
|
||||||
|
|
||||||
|
# 预创建客户端连接池
|
||||||
|
self._clients: Dict[str, OCIGenAIClient] = {}
|
||||||
|
logger.info(f"初始化 OCI 客户端管理器,共 {len(self.profiles)} 个 profiles: {self.profiles}")
|
||||||
|
|
||||||
|
for profile in self.profiles:
|
||||||
|
try:
|
||||||
|
self._clients[profile] = OCIGenAIClient(self.settings, profile)
|
||||||
|
logger.info(f"✓ 已创建客户端实例: {profile}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"✗ 创建客户端实例失败 [{profile}]: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def get_client(self) -> OCIGenAIClient:
|
||||||
|
"""
|
||||||
|
获取下一个客户端(轮询策略)
|
||||||
|
|
||||||
|
采用 round-robin 算法从预创建的客户端连接池中选择客户端实例。
|
||||||
|
此方法是线程安全的。
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
OCIGenAIClient: 预创建的 OCI 客户端实例
|
||||||
|
|
||||||
|
Note:
|
||||||
|
客户端实例在管理器初始化时预创建,此方法不会创建新实例。
|
||||||
|
"""
|
||||||
|
with self.lock:
|
||||||
|
# 如果只有一个 profile,直接返回
|
||||||
|
if len(self.profiles) == 1:
|
||||||
|
return self._clients[self.profiles[0]]
|
||||||
|
|
||||||
|
# 轮询选择 profile
|
||||||
|
profile = self.profiles[self.current_index]
|
||||||
|
self.current_index = (self.current_index + 1) % len(self.profiles)
|
||||||
|
|
||||||
|
logger.debug(f"选择 profile: {profile} (round-robin)")
|
||||||
|
return self._clients[profile]
|
||||||
|
|
||||||
|
|
||||||
|
# 全局客户端管理器实例
|
||||||
|
_client_manager = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_client_manager() -> OCIClientManager:
|
||||||
|
"""获取全局客户端管理器实例"""
|
||||||
|
global _client_manager
|
||||||
|
if _client_manager is None:
|
||||||
|
_client_manager = OCIClientManager()
|
||||||
|
return _client_manager
|
||||||
100
src/core/config.py
Normal file
100
src/core/config.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
"""
|
||||||
|
Configuration module for OCI Generative AI to OpenAI API Gateway.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional, List
|
||||||
|
from pydantic_settings import BaseSettings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Find project root directory (where .env should be)
|
||||||
|
def find_project_root() -> Path:
|
||||||
|
"""Find the project root directory by looking for .env or requirements.txt."""
|
||||||
|
current = Path(__file__).resolve().parent # Start from src/core/
|
||||||
|
# Go up until we find project root markers
|
||||||
|
while current != current.parent:
|
||||||
|
if (current / ".env").exists() or (current / "requirements.txt").exists():
|
||||||
|
return current
|
||||||
|
current = current.parent
|
||||||
|
return Path.cwd() # Fallback to current directory
|
||||||
|
|
||||||
|
PROJECT_ROOT = find_project_root()
|
||||||
|
|
||||||
|
|
||||||
|
class Settings(BaseSettings):
|
||||||
|
"""Application settings with environment variable support."""
|
||||||
|
|
||||||
|
# API Settings
|
||||||
|
api_title: str = "OCI GenAI to OpenAI API Gateway"
|
||||||
|
api_version: str = "1.0.0"
|
||||||
|
api_prefix: str = "/v1"
|
||||||
|
api_port: int = 8000
|
||||||
|
api_host: str = "0.0.0.0"
|
||||||
|
debug: bool = False
|
||||||
|
|
||||||
|
# Authentication
|
||||||
|
api_keys: List[str] = ["sk-oci-genai-default-key"]
|
||||||
|
|
||||||
|
# OCI Settings
|
||||||
|
oci_config_file: str = "~/.oci/config"
|
||||||
|
oci_config_profile: str = "DEFAULT" # 支持多个profile,用逗号分隔,例如:DEFAULT,CHICAGO,ASHBURN
|
||||||
|
oci_auth_type: str = "api_key" # api_key or instance_principal
|
||||||
|
|
||||||
|
# GenAI Service Settings
|
||||||
|
genai_endpoint: Optional[str] = None
|
||||||
|
max_tokens: int = 4096
|
||||||
|
temperature: float = 0.7
|
||||||
|
|
||||||
|
# Embedding Settings
|
||||||
|
embed_truncate: str = "END" # END or START
|
||||||
|
|
||||||
|
# Streaming Settings
|
||||||
|
enable_streaming: bool = True
|
||||||
|
stream_chunk_size: int = 1024
|
||||||
|
|
||||||
|
# Logging
|
||||||
|
log_level: str = "INFO"
|
||||||
|
log_requests: bool = False
|
||||||
|
log_responses: bool = False
|
||||||
|
log_file: Optional[str] = None
|
||||||
|
log_file_max_size: int = 10 # MB
|
||||||
|
log_file_backup_count: int = 5
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
# Use absolute path to .env file in project root
|
||||||
|
env_file = str(PROJECT_ROOT / ".env")
|
||||||
|
env_file_encoding = "utf-8"
|
||||||
|
case_sensitive = False
|
||||||
|
|
||||||
|
# Allow reading from environment variables
|
||||||
|
env_prefix = ""
|
||||||
|
|
||||||
|
def model_post_init(self, __context) -> None:
|
||||||
|
"""Expand OCI config file path."""
|
||||||
|
# Expand OCI config file path
|
||||||
|
config_path = os.path.expanduser(self.oci_config_file)
|
||||||
|
|
||||||
|
# If it's a relative path (starts with ./ or doesn't start with /), resolve it from project root
|
||||||
|
if not config_path.startswith('/') and not config_path.startswith('~'):
|
||||||
|
# Remove leading ./ if present
|
||||||
|
if config_path.startswith('./'):
|
||||||
|
config_path = config_path[2:]
|
||||||
|
config_path = str(PROJECT_ROOT / config_path)
|
||||||
|
|
||||||
|
# Update the config_path
|
||||||
|
self.oci_config_file = config_path
|
||||||
|
|
||||||
|
def get_profiles(self) -> List[str]:
|
||||||
|
"""获取配置的所有 profile 列表"""
|
||||||
|
return [p.strip() for p in self.oci_config_profile.split(',') if p.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
# Global settings instance
|
||||||
|
settings = Settings()
|
||||||
|
|
||||||
|
|
||||||
|
def get_settings() -> Settings:
|
||||||
|
"""Get the global settings instance."""
|
||||||
|
return settings
|
||||||
260
src/core/models.py
Normal file
260
src/core/models.py
Normal file
@@ -0,0 +1,260 @@
|
|||||||
|
"""
|
||||||
|
Model definitions and configurations for OCI Generative AI models.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from typing import Dict, List, Optional
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class ModelConfig(BaseModel):
|
||||||
|
"""Configuration for a single model."""
|
||||||
|
id: str
|
||||||
|
name: str
|
||||||
|
type: str # ondemand, dedicated, embedding
|
||||||
|
provider: str # cohere, meta, openai, etc.
|
||||||
|
region: Optional[str] = None
|
||||||
|
compartment_id: Optional[str] = None
|
||||||
|
endpoint: Optional[str] = None
|
||||||
|
supports_streaming: bool = True
|
||||||
|
supports_tools: bool = False
|
||||||
|
supports_multimodal: bool = False
|
||||||
|
multimodal_types: List[str] = []
|
||||||
|
max_tokens: int = 4096
|
||||||
|
context_window: int = 128000
|
||||||
|
|
||||||
|
|
||||||
|
# OCI Generative AI models (dynamically loaded from OCI at startup)
|
||||||
|
OCI_CHAT_MODELS: Dict[str, ModelConfig] = {}
|
||||||
|
|
||||||
|
OCI_EMBED_MODELS: Dict[str, ModelConfig] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_models() -> List[ModelConfig]:
|
||||||
|
"""Get all available models."""
|
||||||
|
return list(OCI_CHAT_MODELS.values()) + list(OCI_EMBED_MODELS.values())
|
||||||
|
|
||||||
|
|
||||||
|
def get_chat_models() -> List[ModelConfig]:
|
||||||
|
"""Get all chat models."""
|
||||||
|
return list(OCI_CHAT_MODELS.values())
|
||||||
|
|
||||||
|
|
||||||
|
def get_embed_models() -> List[ModelConfig]:
|
||||||
|
"""Get all embedding models."""
|
||||||
|
return list(OCI_EMBED_MODELS.values())
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_config(model_id: str) -> Optional[ModelConfig]:
|
||||||
|
"""Get configuration for a specific model."""
|
||||||
|
if model_id in OCI_CHAT_MODELS:
|
||||||
|
return OCI_CHAT_MODELS[model_id]
|
||||||
|
if model_id in OCI_EMBED_MODELS:
|
||||||
|
return OCI_EMBED_MODELS[model_id]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_models_from_oci(compartment_id: Optional[str] = None, region: Optional[str] = None,
|
||||||
|
config_path: str = "./.oci/config",
|
||||||
|
profile: str = "DEFAULT") -> Dict[str, Dict[str, ModelConfig]]:
|
||||||
|
"""
|
||||||
|
Dynamically fetch available models from OCI Generative AI service.
|
||||||
|
|
||||||
|
If compartment_id or region are not provided, they will be read from the OCI config file.
|
||||||
|
- compartment_id defaults to 'tenancy' from config
|
||||||
|
- region defaults to 'region' from config
|
||||||
|
|
||||||
|
Args:
|
||||||
|
compartment_id: OCI compartment ID (optional, defaults to tenancy from config)
|
||||||
|
region: OCI region (optional, defaults to region from config)
|
||||||
|
config_path: Path to OCI config file
|
||||||
|
profile: OCI config profile name
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with 'chat' and 'embed' keys containing model configs
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import oci
|
||||||
|
from oci.generative_ai import GenerativeAiClient
|
||||||
|
|
||||||
|
# Load OCI configuration
|
||||||
|
config = oci.config.from_file(
|
||||||
|
file_location=os.path.expanduser(config_path),
|
||||||
|
profile_name=profile
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use values from config if not provided
|
||||||
|
if not region:
|
||||||
|
region = config.get("region")
|
||||||
|
logger.info(f"📍 Using region from OCI config: {region}")
|
||||||
|
|
||||||
|
if not compartment_id:
|
||||||
|
compartment_id = config.get("tenancy")
|
||||||
|
logger.info(f"📦 Using tenancy as compartment_id: {compartment_id}")
|
||||||
|
|
||||||
|
if not region or not compartment_id:
|
||||||
|
logger.error("❌ Missing region or compartment_id in OCI config")
|
||||||
|
return {"chat": {}, "embed": {}}
|
||||||
|
|
||||||
|
# Create GenerativeAiClient (not GenerativeAiInferenceClient)
|
||||||
|
service_endpoint = f"https://generativeai.{region}.oci.oraclecloud.com"
|
||||||
|
logger.info(f"🔗 Connecting to OCI GenerativeAI endpoint: {service_endpoint}")
|
||||||
|
client = GenerativeAiClient(config, service_endpoint=service_endpoint)
|
||||||
|
|
||||||
|
chat_models = {}
|
||||||
|
embed_models = {}
|
||||||
|
|
||||||
|
# Fetch all models (without capability filter to work with tenancy compartment)
|
||||||
|
try:
|
||||||
|
logger.info("🔍 Fetching all models from OCI...")
|
||||||
|
logger.debug(f" Compartment ID: {compartment_id}")
|
||||||
|
logger.debug(f" Method: Fetching all models, will filter by capabilities in Python")
|
||||||
|
|
||||||
|
response = client.list_models(
|
||||||
|
compartment_id=compartment_id
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"✅ Successfully fetched {len(response.data.items)} models from OCI")
|
||||||
|
|
||||||
|
# Filter models by capabilities in Python
|
||||||
|
for model in response.data.items:
|
||||||
|
model_id = model.display_name
|
||||||
|
provider = model_id.split(".")[0] if "." in model_id else "unknown"
|
||||||
|
capabilities = model.capabilities if hasattr(model, 'capabilities') else []
|
||||||
|
|
||||||
|
logger.debug(f" Processing: {model_id} (capabilities: {capabilities})")
|
||||||
|
|
||||||
|
# Chat models: have CHAT or TEXT_GENERATION capability
|
||||||
|
if 'CHAT' in capabilities or 'TEXT_GENERATION' in capabilities:
|
||||||
|
supports_streaming = True # Most models support streaming
|
||||||
|
supports_tools = provider in ["cohere", "meta"] # These providers support tools
|
||||||
|
|
||||||
|
# Detect multimodal support from capabilities
|
||||||
|
supports_multimodal = False
|
||||||
|
multimodal_types = []
|
||||||
|
if 'IMAGE' in capabilities or 'VISION' in capabilities:
|
||||||
|
supports_multimodal = True
|
||||||
|
multimodal_types.append("image")
|
||||||
|
|
||||||
|
chat_models[model_id] = ModelConfig(
|
||||||
|
id=model_id,
|
||||||
|
name=model.display_name,
|
||||||
|
type="ondemand",
|
||||||
|
provider=provider,
|
||||||
|
region=region,
|
||||||
|
compartment_id=compartment_id,
|
||||||
|
supports_streaming=supports_streaming,
|
||||||
|
supports_tools=supports_tools,
|
||||||
|
supports_multimodal=supports_multimodal,
|
||||||
|
multimodal_types=multimodal_types,
|
||||||
|
max_tokens=4096,
|
||||||
|
context_window=128000
|
||||||
|
)
|
||||||
|
|
||||||
|
# Embedding models: have TEXT_EMBEDDINGS capability
|
||||||
|
elif 'TEXT_EMBEDDINGS' in capabilities:
|
||||||
|
embed_models[model_id] = ModelConfig(
|
||||||
|
id=model_id,
|
||||||
|
name=model.display_name,
|
||||||
|
type="embedding",
|
||||||
|
provider=provider,
|
||||||
|
region=region,
|
||||||
|
compartment_id=compartment_id,
|
||||||
|
supports_streaming=False,
|
||||||
|
supports_tools=False,
|
||||||
|
max_tokens=512,
|
||||||
|
context_window=512
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(f"✅ Filtered {len(chat_models)} chat models")
|
||||||
|
if chat_models:
|
||||||
|
logger.debug(f" Chat models: {', '.join(list(chat_models.keys())[:5])}{'...' if len(chat_models) > 5 else ''}")
|
||||||
|
|
||||||
|
logger.info(f"✅ Filtered {len(embed_models)} embedding models")
|
||||||
|
if embed_models:
|
||||||
|
logger.debug(f" Embed models: {', '.join(embed_models.keys())}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"⚠️ Failed to fetch models from OCI")
|
||||||
|
logger.warning(f" Error: {e}")
|
||||||
|
if hasattr(e, 'status'):
|
||||||
|
logger.warning(f" HTTP Status: {e.status}")
|
||||||
|
if hasattr(e, 'code'):
|
||||||
|
logger.warning(f" Error Code: {e.code}")
|
||||||
|
logger.info(f"💡 Tip: Check your OCI credentials and permissions")
|
||||||
|
|
||||||
|
return {"chat": chat_models, "embed": embed_models}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Failed to initialize OCI client for model discovery")
|
||||||
|
logger.error(f" Error: {e}")
|
||||||
|
logger.info("💡 Tip: Check your OCI credentials and permissions")
|
||||||
|
return {"chat": {}, "embed": {}}
|
||||||
|
|
||||||
|
|
||||||
|
def update_models_from_oci(compartment_id: Optional[str] = None,
|
||||||
|
region: Optional[str] = None,
|
||||||
|
config_path: str = "./.oci/config",
|
||||||
|
profile: str = "DEFAULT") -> None:
|
||||||
|
"""
|
||||||
|
Update global model dictionaries with models from OCI.
|
||||||
|
Raises RuntimeError if model fetching fails.
|
||||||
|
|
||||||
|
Priority for configuration values:
|
||||||
|
1. Explicitly provided parameters
|
||||||
|
2. Environment variables (OCI_COMPARTMENT_ID, OCI_REGION)
|
||||||
|
3. Values from .oci/config file (tenancy, region)
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If no models can be fetched from OCI
|
||||||
|
"""
|
||||||
|
global OCI_CHAT_MODELS, OCI_EMBED_MODELS
|
||||||
|
|
||||||
|
# Priority: explicit params > environment > config file
|
||||||
|
if not compartment_id:
|
||||||
|
compartment_id = os.getenv("OCI_COMPARTMENT_ID")
|
||||||
|
if not region:
|
||||||
|
region = os.getenv("OCI_REGION")
|
||||||
|
|
||||||
|
# Note: If still not set, fetch_models_from_oci will try to read from config file
|
||||||
|
logger.info("🚀 Attempting to fetch models from OCI...")
|
||||||
|
fetched = fetch_models_from_oci(compartment_id, region, config_path, profile)
|
||||||
|
|
||||||
|
# Fail-fast: Require successful model fetching
|
||||||
|
if not fetched["chat"] and not fetched["embed"]:
|
||||||
|
error_msg = (
|
||||||
|
"❌ Failed to fetch any models from OCI.\n\n"
|
||||||
|
"Troubleshooting steps:\n"
|
||||||
|
"1. Verify your OCI credentials are configured correctly:\n"
|
||||||
|
f" - Config file: {config_path}\n"
|
||||||
|
f" - Profile: {profile}\n"
|
||||||
|
" - Run: oci iam region list (to test authentication)\n\n"
|
||||||
|
"2. Check your OCI permissions:\n"
|
||||||
|
" - Ensure you have access to Generative AI service\n"
|
||||||
|
" - Verify compartment_id/tenancy has available models\n\n"
|
||||||
|
"3. Check network connectivity:\n"
|
||||||
|
" - Ensure you can reach OCI endpoints\n"
|
||||||
|
f" - Test region: {region or 'from config file'}\n\n"
|
||||||
|
"4. Review logs above for detailed error messages"
|
||||||
|
)
|
||||||
|
logger.error(error_msg)
|
||||||
|
raise RuntimeError(
|
||||||
|
"Failed to fetch models from OCI. "
|
||||||
|
"The service cannot start without available models. "
|
||||||
|
"Check the logs above for troubleshooting guidance."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update global model registries
|
||||||
|
if fetched["chat"]:
|
||||||
|
OCI_CHAT_MODELS.clear()
|
||||||
|
OCI_CHAT_MODELS.update(fetched["chat"])
|
||||||
|
logger.info(f"✅ Loaded {len(OCI_CHAT_MODELS)} chat models from OCI")
|
||||||
|
|
||||||
|
if fetched["embed"]:
|
||||||
|
OCI_EMBED_MODELS.clear()
|
||||||
|
OCI_EMBED_MODELS.update(fetched["embed"])
|
||||||
|
logger.info(f"✅ Loaded {len(OCI_EMBED_MODELS)} embedding models from OCI")
|
||||||
|
|
||||||
|
logger.info(f"✅ Model discovery completed successfully")
|
||||||
361
src/core/oci_client.py
Normal file
361
src/core/oci_client.py
Normal file
@@ -0,0 +1,361 @@
|
|||||||
|
"""
|
||||||
|
OCI Generative AI client wrapper.
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from typing import Optional, AsyncIterator
|
||||||
|
import oci
|
||||||
|
from oci.generative_ai_inference import GenerativeAiInferenceClient
|
||||||
|
from oci.generative_ai_inference.models import (
|
||||||
|
ChatDetails,
|
||||||
|
CohereChatRequest,
|
||||||
|
GenericChatRequest,
|
||||||
|
OnDemandServingMode,
|
||||||
|
DedicatedServingMode,
|
||||||
|
CohereMessage,
|
||||||
|
Message,
|
||||||
|
TextContent,
|
||||||
|
EmbedTextDetails,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Try to import multimodal content types
|
||||||
|
try:
|
||||||
|
from oci.generative_ai_inference.models import (
|
||||||
|
ImageContent,
|
||||||
|
ImageUrl,
|
||||||
|
AudioContent,
|
||||||
|
AudioUrl,
|
||||||
|
VideoContent,
|
||||||
|
VideoUrl,
|
||||||
|
)
|
||||||
|
MULTIMODAL_SUPPORTED = True
|
||||||
|
logger_init = logging.getLogger(__name__)
|
||||||
|
logger_init.info("OCI SDK multimodal content types available")
|
||||||
|
except ImportError:
|
||||||
|
MULTIMODAL_SUPPORTED = False
|
||||||
|
logger_init = logging.getLogger(__name__)
|
||||||
|
logger_init.warning("OCI SDK does not support multimodal content types, using dict format as fallback")
|
||||||
|
|
||||||
|
from .config import Settings
|
||||||
|
from .models import get_model_config, ModelConfig
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def build_multimodal_content(content_list: list) -> list:
|
||||||
|
"""
|
||||||
|
Build OCI ChatContent object array from adapted content list.
|
||||||
|
|
||||||
|
Supports both HTTP URLs and Base64 data URIs (data:image/jpeg;base64,...).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content_list: List of content items from request adapter
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of OCI ChatContent objects or dicts (fallback)
|
||||||
|
"""
|
||||||
|
if not MULTIMODAL_SUPPORTED:
|
||||||
|
# Fallback: return dict format, OCI SDK might auto-convert
|
||||||
|
return content_list
|
||||||
|
|
||||||
|
oci_contents = []
|
||||||
|
for item in content_list:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
|
||||||
|
item_type = item.get("type")
|
||||||
|
|
||||||
|
if item_type == "text":
|
||||||
|
oci_contents.append(TextContent(text=item.get("text", "")))
|
||||||
|
|
||||||
|
elif item_type == "image_url":
|
||||||
|
image_data = item.get("image_url", {})
|
||||||
|
if "url" in image_data:
|
||||||
|
# ImageUrl accepts both HTTP URLs and data URIs (data:image/jpeg;base64,...)
|
||||||
|
img_url = ImageUrl(url=image_data["url"])
|
||||||
|
# Optional: support 'detail' parameter if provided
|
||||||
|
if "detail" in image_data:
|
||||||
|
img_url.detail = image_data["detail"]
|
||||||
|
oci_contents.append(ImageContent(image_url=img_url, type="IMAGE"))
|
||||||
|
|
||||||
|
elif item_type == "audio":
|
||||||
|
audio_data = item.get("audio_url", {})
|
||||||
|
if "url" in audio_data:
|
||||||
|
# AudioUrl accepts both HTTP URLs and data URIs (data:audio/wav;base64,...)
|
||||||
|
audio_url = AudioUrl(url=audio_data["url"])
|
||||||
|
oci_contents.append(AudioContent(audio_url=audio_url, type="AUDIO"))
|
||||||
|
|
||||||
|
elif item_type == "video":
|
||||||
|
video_data = item.get("video_url", {})
|
||||||
|
if "url" in video_data:
|
||||||
|
# VideoUrl accepts both HTTP URLs and data URIs (data:video/mp4;base64,...)
|
||||||
|
video_url = VideoUrl(url=video_data["url"])
|
||||||
|
oci_contents.append(VideoContent(video_url=video_url, type="VIDEO"))
|
||||||
|
|
||||||
|
return oci_contents if oci_contents else [TextContent(text="")]
|
||||||
|
|
||||||
|
|
||||||
|
class OCIGenAIClient:
|
||||||
|
"""Wrapper for OCI Generative AI client."""
|
||||||
|
|
||||||
|
def __init__(self, settings: Settings, profile: Optional[str] = None):
|
||||||
|
"""
|
||||||
|
初始化 OCI GenAI 客户端
|
||||||
|
|
||||||
|
Args:
|
||||||
|
settings: 应用设置
|
||||||
|
profile: 可选的 OCI 配置 profile 名称。如果未提供,使用 settings 中的第一个 profile
|
||||||
|
"""
|
||||||
|
self.settings = settings
|
||||||
|
self.profile = profile or settings.get_profiles()[0]
|
||||||
|
self._client: Optional[GenerativeAiInferenceClient] = None
|
||||||
|
self._config: Optional[oci.config.Config] = None
|
||||||
|
self._region: Optional[str] = None
|
||||||
|
self._compartment_id: Optional[str] = None
|
||||||
|
|
||||||
|
def _get_config(self) -> dict:
|
||||||
|
"""Get OCI configuration."""
|
||||||
|
if self._config is None:
|
||||||
|
if self.settings.oci_auth_type == "instance_principal":
|
||||||
|
signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
|
||||||
|
self._config = {"signer": signer}
|
||||||
|
else:
|
||||||
|
config_path = os.path.expanduser(self.settings.oci_config_file)
|
||||||
|
self._config = oci.config.from_file(
|
||||||
|
file_location=config_path,
|
||||||
|
profile_name=self.profile
|
||||||
|
)
|
||||||
|
|
||||||
|
# 从配置中读取 region 和 compartment_id
|
||||||
|
if self._region is None:
|
||||||
|
self._region = self._config.get("region")
|
||||||
|
if self._compartment_id is None:
|
||||||
|
self._compartment_id = self._config.get("tenancy")
|
||||||
|
|
||||||
|
return self._config
|
||||||
|
|
||||||
|
@property
|
||||||
|
def region(self) -> Optional[str]:
|
||||||
|
"""获取当前配置的区域"""
|
||||||
|
if self._region is None and self._config is None:
|
||||||
|
self._get_config()
|
||||||
|
return self._region
|
||||||
|
|
||||||
|
@property
|
||||||
|
def compartment_id(self) -> Optional[str]:
|
||||||
|
"""获取当前配置的 compartment ID"""
|
||||||
|
if self._compartment_id is None and self._config is None:
|
||||||
|
self._get_config()
|
||||||
|
return self._compartment_id
|
||||||
|
|
||||||
|
def _get_client(self) -> GenerativeAiInferenceClient:
|
||||||
|
"""Get or create OCI Generative AI Inference client with correct endpoint."""
|
||||||
|
config = self._get_config()
|
||||||
|
|
||||||
|
# Use INFERENCE endpoint (not management endpoint)
|
||||||
|
# Official format: https://inference.generativeai.{region}.oci.oraclecloud.com
|
||||||
|
inference_endpoint = f"https://inference.generativeai.{self.region}.oci.oraclecloud.com"
|
||||||
|
|
||||||
|
if isinstance(config, dict) and "signer" in config:
|
||||||
|
# For instance principal
|
||||||
|
client = GenerativeAiInferenceClient(
|
||||||
|
config={},
|
||||||
|
service_endpoint=inference_endpoint,
|
||||||
|
**config
|
||||||
|
)
|
||||||
|
return client
|
||||||
|
|
||||||
|
# For API key authentication
|
||||||
|
client = GenerativeAiInferenceClient(
|
||||||
|
config=config,
|
||||||
|
service_endpoint=inference_endpoint,
|
||||||
|
retry_strategy=oci.retry.NoneRetryStrategy(),
|
||||||
|
timeout=(10, 240)
|
||||||
|
)
|
||||||
|
|
||||||
|
return client
|
||||||
|
|
||||||
|
def chat(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
messages: list,
|
||||||
|
temperature: float = 0.7,
|
||||||
|
max_tokens: int = 1024,
|
||||||
|
top_p: float = 1.0,
|
||||||
|
stream: bool = False,
|
||||||
|
tools: Optional[list] = None,
|
||||||
|
):
|
||||||
|
"""Send a chat completion request to OCI GenAI."""
|
||||||
|
model_config = get_model_config(model_id)
|
||||||
|
if not model_config:
|
||||||
|
raise ValueError(f"Unsupported model: {model_id}")
|
||||||
|
|
||||||
|
if not self.compartment_id:
|
||||||
|
raise ValueError("Compartment ID is required")
|
||||||
|
|
||||||
|
client = self._get_client()
|
||||||
|
|
||||||
|
# Prepare serving mode
|
||||||
|
if model_config.type == "dedicated" and model_config.endpoint:
|
||||||
|
serving_mode = DedicatedServingMode(endpoint_id=model_config.endpoint)
|
||||||
|
else:
|
||||||
|
serving_mode = OnDemandServingMode(model_id=model_id)
|
||||||
|
|
||||||
|
# Convert messages based on provider
|
||||||
|
if model_config.provider == "cohere":
|
||||||
|
chat_request = self._build_cohere_request(
|
||||||
|
messages, temperature, max_tokens, top_p, tools, stream
|
||||||
|
)
|
||||||
|
elif model_config.provider in ["meta", "xai", "google", "openai"]:
|
||||||
|
chat_request = self._build_generic_request(
|
||||||
|
messages, temperature, max_tokens, top_p, tools, model_config.provider, stream
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported provider: {model_config.provider}")
|
||||||
|
|
||||||
|
chat_details = ChatDetails(
|
||||||
|
serving_mode=serving_mode,
|
||||||
|
compartment_id=self.compartment_id,
|
||||||
|
chat_request=chat_request,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"Sending chat request to OCI GenAI: {model_id}")
|
||||||
|
response = client.chat(chat_details)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def _build_cohere_request(
|
||||||
|
self, messages: list, temperature: float, max_tokens: int, top_p: float, tools: Optional[list], stream: bool = False
|
||||||
|
) -> CohereChatRequest:
|
||||||
|
"""Build Cohere chat request.
|
||||||
|
|
||||||
|
Note: Cohere models only support text content, not multimodal.
|
||||||
|
"""
|
||||||
|
# Convert messages to Cohere format
|
||||||
|
chat_history = []
|
||||||
|
message = None
|
||||||
|
|
||||||
|
for msg in messages:
|
||||||
|
role = msg["role"]
|
||||||
|
content = msg["content"]
|
||||||
|
|
||||||
|
# Extract text from multimodal content
|
||||||
|
if isinstance(content, list):
|
||||||
|
# Extract text parts only
|
||||||
|
text_parts = []
|
||||||
|
for item in content:
|
||||||
|
if isinstance(item, dict) and item.get("type") == "text":
|
||||||
|
text_parts.append(item.get("text", ""))
|
||||||
|
content = " ".join(text_parts) if text_parts else ""
|
||||||
|
|
||||||
|
if role == "system":
|
||||||
|
# Cohere uses preamble for system messages
|
||||||
|
continue
|
||||||
|
elif role == "user":
|
||||||
|
message = content
|
||||||
|
elif role == "assistant":
|
||||||
|
chat_history.append(
|
||||||
|
CohereMessage(role="CHATBOT", message=content)
|
||||||
|
)
|
||||||
|
elif role == "tool":
|
||||||
|
# Handle tool responses if needed
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Get preamble from system messages
|
||||||
|
preamble_override = None
|
||||||
|
for msg in messages:
|
||||||
|
if msg["role"] == "system":
|
||||||
|
preamble_override = msg["content"]
|
||||||
|
break
|
||||||
|
|
||||||
|
return CohereChatRequest(
|
||||||
|
message=message,
|
||||||
|
chat_history=chat_history if chat_history else None,
|
||||||
|
preamble_override=preamble_override,
|
||||||
|
temperature=temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
top_p=top_p,
|
||||||
|
is_stream=stream,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _build_generic_request(
|
||||||
|
self, messages: list, temperature: float, max_tokens: int, top_p: float, tools: Optional[list], provider: str, stream: bool = False
|
||||||
|
) -> GenericChatRequest:
|
||||||
|
"""Build Generic chat request for Llama and other models."""
|
||||||
|
# Convert messages to Generic format
|
||||||
|
generic_messages = []
|
||||||
|
for msg in messages:
|
||||||
|
role = msg["role"]
|
||||||
|
content = msg["content"]
|
||||||
|
|
||||||
|
# Handle multimodal content
|
||||||
|
if isinstance(content, list):
|
||||||
|
# Build OCI ChatContent objects from multimodal content
|
||||||
|
oci_contents = build_multimodal_content(content)
|
||||||
|
else:
|
||||||
|
# Simple text content
|
||||||
|
if MULTIMODAL_SUPPORTED:
|
||||||
|
oci_contents = [TextContent(text=content)]
|
||||||
|
else:
|
||||||
|
# Fallback: use dict format
|
||||||
|
oci_contents = [{"type": "text", "text": content}]
|
||||||
|
|
||||||
|
if role == "user":
|
||||||
|
oci_role = "USER"
|
||||||
|
elif role in ["assistant", "model"]:
|
||||||
|
oci_role = "ASSISTANT"
|
||||||
|
elif role == "system":
|
||||||
|
oci_role = "SYSTEM"
|
||||||
|
else:
|
||||||
|
oci_role = role.upper()
|
||||||
|
|
||||||
|
# Create Message with role and content objects
|
||||||
|
logger.debug(f"Creating message with role: {oci_role}, provider: {provider}, original role: {role}")
|
||||||
|
|
||||||
|
generic_messages.append(
|
||||||
|
Message(
|
||||||
|
role=oci_role,
|
||||||
|
content=oci_contents
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
return GenericChatRequest(
|
||||||
|
messages=generic_messages,
|
||||||
|
temperature=temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
top_p=top_p,
|
||||||
|
is_stream=stream,
|
||||||
|
)
|
||||||
|
|
||||||
|
def embed(
|
||||||
|
self,
|
||||||
|
model_id: str,
|
||||||
|
texts: list,
|
||||||
|
truncate: str = "END",
|
||||||
|
):
|
||||||
|
"""Generate embeddings using OCI GenAI."""
|
||||||
|
model_config = get_model_config(model_id)
|
||||||
|
if not model_config or model_config.type != "embedding":
|
||||||
|
raise ValueError(f"Invalid embedding model: {model_id}")
|
||||||
|
|
||||||
|
if not self.compartment_id:
|
||||||
|
raise ValueError("Compartment ID is required")
|
||||||
|
|
||||||
|
client = self._get_client()
|
||||||
|
|
||||||
|
serving_mode = OnDemandServingMode(
|
||||||
|
serving_type="ON_DEMAND",
|
||||||
|
model_id=model_id
|
||||||
|
)
|
||||||
|
|
||||||
|
embed_details = EmbedTextDetails(
|
||||||
|
serving_mode=serving_mode,
|
||||||
|
compartment_id=self.compartment_id,
|
||||||
|
inputs=texts,
|
||||||
|
truncate=truncate,
|
||||||
|
is_echo=False,
|
||||||
|
input_type="SEARCH_QUERY",
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"Sending embed request to OCI GenAI: {model_id}")
|
||||||
|
response = client.embed_text(embed_details)
|
||||||
|
return response
|
||||||
274
src/main.py
Normal file
274
src/main.py
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
"""
|
||||||
|
Main FastAPI application for OCI Generative AI to OpenAI API Gateway.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
from logging.handlers import RotatingFileHandler
|
||||||
|
|
||||||
|
from fastapi import FastAPI, Request, HTTPException
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from fastapi.exceptions import RequestValidationError
|
||||||
|
|
||||||
|
from oci.exceptions import ServiceError
|
||||||
|
|
||||||
|
from core.config import get_settings
|
||||||
|
from core.models import update_models_from_oci
|
||||||
|
from api.routers import models, chat, embeddings
|
||||||
|
from api.schemas import ErrorResponse, ErrorDetail
|
||||||
|
from api.error_handler import OCIErrorHandler
|
||||||
|
from api.exceptions import ModelNotFoundException, InvalidModelTypeException
|
||||||
|
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
def setup_logging():
|
||||||
|
"""Setup logging configuration."""
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
# Create handlers list
|
||||||
|
handlers = [
|
||||||
|
logging.StreamHandler(sys.stdout)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add file handler if log_file is configured
|
||||||
|
if settings.log_file:
|
||||||
|
log_dir = os.path.dirname(settings.log_file)
|
||||||
|
if log_dir and not os.path.exists(log_dir):
|
||||||
|
os.makedirs(log_dir, exist_ok=True)
|
||||||
|
|
||||||
|
file_handler = RotatingFileHandler(
|
||||||
|
settings.log_file,
|
||||||
|
maxBytes=settings.log_file_max_size * 1024 * 1024, # Convert MB to bytes
|
||||||
|
backupCount=settings.log_file_backup_count,
|
||||||
|
encoding='utf-8'
|
||||||
|
)
|
||||||
|
handlers.append(file_handler)
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=getattr(logging, settings.log_level.upper()),
|
||||||
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||||
|
handlers=handlers
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
setup_logging()
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""Application lifespan handler."""
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("Starting OCI GenAI to OpenAI API Gateway")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
logger.info(f"API Version: {settings.api_version}")
|
||||||
|
logger.info(f"API Prefix: {settings.api_prefix}")
|
||||||
|
logger.info(f"Debug Mode: {settings.debug}")
|
||||||
|
logger.info(f"OCI Config: {settings.oci_config_file}")
|
||||||
|
|
||||||
|
profiles = settings.get_profiles()
|
||||||
|
logger.info(f"OCI Profiles: {', '.join(profiles)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Fetch models from OCI (fails fast if unable to fetch)
|
||||||
|
# 使用第一个 profile 进行模型发现
|
||||||
|
update_models_from_oci(
|
||||||
|
config_path=settings.oci_config_file,
|
||||||
|
profile=profiles[0] if profiles else "DEFAULT"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("✅ Startup completed successfully")
|
||||||
|
logger.info(f"Server listening on {settings.api_host}:{settings.api_port}")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
except RuntimeError as e:
|
||||||
|
logger.error("=" * 60)
|
||||||
|
logger.error("❌ STARTUP FAILED")
|
||||||
|
logger.error("=" * 60)
|
||||||
|
logger.error(f"Reason: {str(e)}")
|
||||||
|
logger.error("")
|
||||||
|
logger.error("The service cannot start without available models from OCI.")
|
||||||
|
logger.error("Please review the troubleshooting steps above and fix the issue.")
|
||||||
|
logger.error("=" * 60)
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("=" * 60)
|
||||||
|
logger.error("❌ UNEXPECTED STARTUP ERROR")
|
||||||
|
logger.error("=" * 60)
|
||||||
|
logger.error(f"Error type: {type(e).__name__}")
|
||||||
|
logger.error(f"Error message: {str(e)}")
|
||||||
|
logger.error("=" * 60)
|
||||||
|
raise
|
||||||
|
|
||||||
|
yield
|
||||||
|
|
||||||
|
logger.info("=" * 60)
|
||||||
|
logger.info("Shutting down OCI GenAI to OpenAI API Gateway")
|
||||||
|
logger.info("=" * 60)
|
||||||
|
|
||||||
|
|
||||||
|
# Create FastAPI app
|
||||||
|
settings = get_settings()
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title=settings.api_title,
|
||||||
|
version=settings.api_version,
|
||||||
|
description="OpenAI-compatible REST API for Oracle Cloud Infrastructure Generative AI Service",
|
||||||
|
lifespan=lifespan,
|
||||||
|
docs_url="/docs" if settings.debug else None,
|
||||||
|
redoc_url="/redoc" if settings.debug else None,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add CORS middleware
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Exception handlers
|
||||||
|
@app.exception_handler(ModelNotFoundException)
|
||||||
|
async def model_not_found_handler(request: Request, exc: ModelNotFoundException):
|
||||||
|
"""Handle model not found exceptions with OpenAI-compatible format."""
|
||||||
|
error = ErrorDetail(
|
||||||
|
message=exc.detail,
|
||||||
|
type=exc.error_type,
|
||||||
|
code=exc.error_code
|
||||||
|
)
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=exc.status_code,
|
||||||
|
content=ErrorResponse(error=error).dict()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.exception_handler(InvalidModelTypeException)
|
||||||
|
async def invalid_model_type_handler(request: Request, exc: InvalidModelTypeException):
|
||||||
|
"""Handle invalid model type exceptions with OpenAI-compatible format."""
|
||||||
|
error = ErrorDetail(
|
||||||
|
message=exc.detail,
|
||||||
|
type=exc.error_type,
|
||||||
|
code=exc.error_code
|
||||||
|
)
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=exc.status_code,
|
||||||
|
content=ErrorResponse(error=error).dict()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.exception_handler(ServiceError)
|
||||||
|
async def oci_service_error_handler(request: Request, exc: ServiceError):
|
||||||
|
"""Handle OCI SDK ServiceError exceptions."""
|
||||||
|
# 使用 OCIErrorHandler 处理并过滤敏感信息
|
||||||
|
error_response = OCIErrorHandler.sanitize_oci_error(exc)
|
||||||
|
|
||||||
|
# 确定 HTTP 状态码(使用 OCI 返回的状态码)
|
||||||
|
status_code = exc.status if 400 <= exc.status < 600 else 500
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=status_code,
|
||||||
|
content=error_response.dict()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.exception_handler(HTTPException)
|
||||||
|
async def http_exception_handler(request: Request, exc: HTTPException):
|
||||||
|
"""Handle HTTP exceptions with sensitive information filtering."""
|
||||||
|
# 过滤 HTTPException detail 中可能包含的敏感信息
|
||||||
|
filtered_detail = OCIErrorHandler.filter_sensitive_info(str(exc.detail))
|
||||||
|
|
||||||
|
error = ErrorDetail(
|
||||||
|
message=filtered_detail,
|
||||||
|
type="invalid_request_error",
|
||||||
|
code=f"http_{exc.status_code}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=exc.status_code,
|
||||||
|
content=ErrorResponse(error=error).dict()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.exception_handler(RequestValidationError)
|
||||||
|
async def validation_exception_handler(request: Request, exc: RequestValidationError):
|
||||||
|
"""Handle request validation errors."""
|
||||||
|
logger.error(f"Validation error: {exc}")
|
||||||
|
error = ErrorDetail(
|
||||||
|
message=str(exc),
|
||||||
|
type="invalid_request_error",
|
||||||
|
code="validation_error"
|
||||||
|
)
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=400,
|
||||||
|
content=ErrorResponse(error=error).dict()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.exception_handler(Exception)
|
||||||
|
async def general_exception_handler(request: Request, exc: Exception):
|
||||||
|
"""Handle general exceptions with sensitive information filtering."""
|
||||||
|
logger.error(f"Unexpected error: {exc}", exc_info=True)
|
||||||
|
|
||||||
|
# 通用错误也要过滤可能包含的敏感信息(完整错误已记录到日志)
|
||||||
|
filtered_message = OCIErrorHandler.filter_sensitive_info(str(exc))
|
||||||
|
|
||||||
|
error = ErrorDetail(
|
||||||
|
message="An unexpected error occurred", # 不暴露具体错误
|
||||||
|
type="server_error",
|
||||||
|
code="internal_error"
|
||||||
|
)
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=500,
|
||||||
|
content=ErrorResponse(error=error).dict()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Include routers
|
||||||
|
app.include_router(models.router, prefix=settings.api_prefix)
|
||||||
|
app.include_router(chat.router, prefix=settings.api_prefix)
|
||||||
|
app.include_router(embeddings.router, prefix=settings.api_prefix)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
"""Root endpoint."""
|
||||||
|
return {
|
||||||
|
"name": settings.api_title,
|
||||||
|
"version": settings.api_version,
|
||||||
|
"description": "OpenAI-compatible REST API for OCI Generative AI",
|
||||||
|
"endpoints": {
|
||||||
|
"models": f"{settings.api_prefix}/models",
|
||||||
|
"chat": f"{settings.api_prefix}/chat/completions",
|
||||||
|
"embeddings": f"{settings.api_prefix}/embeddings"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/health")
|
||||||
|
async def health():
|
||||||
|
"""Health check endpoint."""
|
||||||
|
return {
|
||||||
|
"status": "healthy",
|
||||||
|
"service": "oci-genai-gateway"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
|
||||||
|
uvicorn.run(
|
||||||
|
"main:app",
|
||||||
|
host=settings.api_host,
|
||||||
|
port=settings.api_port,
|
||||||
|
reload=settings.debug,
|
||||||
|
log_level=settings.log_level.lower()
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user