第一次提交
All checks were successful
Build and Push OCI GenAI Gateway Docker Image / docker-build-push (push) Successful in 32m3s

This commit is contained in:
2025-12-09 14:44:09 +08:00
commit 42222744c7
27 changed files with 3081 additions and 0 deletions

65
.env.example Normal file
View File

@@ -0,0 +1,65 @@
# API Settings
API_TITLE=OCI GenAI to OpenAI API Gateway
API_VERSION=0.0.1
API_PREFIX=/v1
API_PORT=8000
API_HOST=0.0.0.0
DEBUG=false
# Authentication
# Comma-separated list of API keys for authentication
# These are the keys clients will use in Authorization: Bearer <key>
API_KEYS=["sk-oci-genai-default-key"]
# ============================================
# OCI Configuration
# ============================================
# Path to OCI config file (usually ~/.oci/config)
OCI_CONFIG_FILE=~/.oci/config
# Profile names in the OCI config file
# 支持单个或多个 profile多个 profile 用逗号分隔
# 多个 profile 时会自动使用轮询round-robin负载均衡
# 示例:
# 单配置OCI_CONFIG_PROFILE=DEFAULT
# 多配置OCI_CONFIG_PROFILE=DEFAULT,CHICAGO,ASHBURN
# 注意:每个 profile 在 ~/.oci/config 中必须包含 region 和 tenancy (作为 compartment_id)
OCI_CONFIG_PROFILE=DEFAULT
# Authentication type: api_key or instance_principal
OCI_AUTH_TYPE=api_key
# Optional: Direct endpoint for dedicated models
# GENAI_ENDPOINT=https://your-dedicated-endpoint
# Model Settings
# Note: Available models are dynamically loaded from OCI at startup
# Use GET /v1/models to see all available models
MAX_TOKENS=4096
TEMPERATURE=0.7
# Embedding Settings
# Truncate strategy for embeddings: END or START
EMBED_TRUNCATE=END
# Streaming Settings
# Global streaming on/off switch
# Set to false to disable streaming for all requests (overrides client stream=true)
ENABLE_STREAMING=true
# Chunk size for simulated streaming (fallback mode only)
# Only used when OCI returns non-streaming response
STREAM_CHUNK_SIZE=1024
# Logging
# Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
LOG_LEVEL=INFO
# Log incoming requests (may contain sensitive data)
LOG_REQUESTS=false
# Log responses (may contain sensitive data)
LOG_RESPONSES=false
# Log file path (optional, if not set logs only to console)
LOG_FILE=./logs/app.log
# Max log file size in MB (default: 10)
LOG_FILE_MAX_SIZE=10
# Number of backup log files to keep (default: 5)
LOG_FILE_BACKUP_COUNT=5

82
.gitea/workflows/ci.yaml Normal file
View File

@@ -0,0 +1,82 @@
# .gitea/workflows/ci.yaml
name: Build and Push OCI GenAI Gateway Docker Image
on:
push:
branches: [main, develop]
tags: ['*']
env:
DOCKER_BUILDKIT: "1"
BUILDX_NO_DEFAULT_ATTESTATIONS: "1"
jobs:
docker-build-push:
runs-on: ubuntu-latest-amd64
steps:
- uses: actions/checkout@v4
- name: Debug branch info
run: |
echo "📋 Branch Information:"
echo " github.ref: ${{ github.ref }}"
echo " github.ref_name: ${{ github.ref_name }}"
echo " github.event_name: ${{ github.event_name }}"
- name: Setup Docker Buildx and Login
run: |
# 设置 QEMU 支持多架构
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 2>/dev/null || true
# 创建 buildx builder
docker buildx create --use --name oci_genai_builder \
--driver docker-container \
--driver-opt network=host \
--driver-opt image=moby/buildkit:buildx-stable-1 \
--driver-opt env.BUILDKIT_STEP_LOG_MAX_SIZE=50000000 \
--driver-opt env.BUILDKIT_STEP_LOG_MAX_SPEED=10000000 \
|| docker buildx use oci_genai_builder
docker buildx inspect --bootstrap
# 登录 Docker Registry
echo "${{ secrets.BUILD_TOKEN }}" | docker login ${{ gitea.server_url }} -u ${{ gitea.actor }} --password-stdin
- name: Determine Docker tag
id: tag
run: |
if [ "${{ github.ref_name }}" = "main" ]; then
TAG="latest"
elif [ "${{ github.ref_name }}" = "develop" ]; then
TAG="develop"
elif [[ "${{ github.ref }}" == refs/tags/* ]]; then
TAG="${{ github.ref_name }}"
else
TAG="${{ github.ref_name }}"
fi
echo "tag=${TAG}" >> $GITHUB_OUTPUT
echo "📦 Docker tag: ${TAG}"
- name: Build and push multi-arch Docker image
run: |
# 移除 URL 中的 https:// 前缀
REGISTRY=$(echo "${{ gitea.server_url }}" | sed 's|https\?://||')
IMAGE_NAME="${REGISTRY}/${{ gitea.repository }}"
TAG="${{ steps.tag.outputs.tag }}"
FINAL_IMAGE_TAG="${IMAGE_NAME}:${TAG}"
echo "🏗️ Building and pushing image: ${FINAL_IMAGE_TAG}"
echo " Platforms: linux/amd64, linux/arm64"
# 设置 BuildKit 优化参数
export BUILDKIT_PROGRESS=plain
docker buildx build --pull --push \
-t "${FINAL_IMAGE_TAG}" \
--platform linux/amd64,linux/arm64 \
--provenance=false \
--sbom=false \
-f Dockerfile .
echo ""
echo "✅ Build and push completed!"
echo "🐳 Image: ${FINAL_IMAGE_TAG}"

78
.gitignore vendored Normal file
View File

@@ -0,0 +1,78 @@
# Claude
.claude/
CLAUDE.md
.mcp.json
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual Environment
venv/
env/
ENV/
.venv
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
.DS_Store
# Environment variables
.env
.env.local
# OCI Config (contains sensitive keys)
.oci/
*.pem
# Logs
*.log
logs/
# Testing
.pytest_cache/
.coverage
htmlcov/
.tox/
# Distribution
*.tar.gz
*.whl
# Docker
*.dockerfile.swp
# Source repositories
.source/
# Temporary files
tmp/
temp/
*.tmp
example/
# OS
.DS_Store
Thumbs.db

47
Dockerfile Normal file
View File

@@ -0,0 +1,47 @@
# Multi-stage build for OCI GenAI to OpenAI API Gateway
FROM python:3.11-slim as builder
# 设置工作目录
WORKDIR /app
# 安装系统依赖
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
&& rm -rf /var/lib/apt/lists/*
# 复制依赖文件
COPY requirements.txt .
# 安装 Python 依赖
RUN pip install --no-cache-dir --user -r requirements.txt
# 最终镜像
FROM python:3.11-slim
# 设置环境变量
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PATH=/root/.local/bin:$PATH
# 设置工作目录
WORKDIR /app
# 复制 Python 依赖
COPY --from=builder /root/.local /root/.local
# 复制应用代码
COPY src/ ./src/
COPY .env.example .env
# 创建日志目录
RUN mkdir -p /app/logs
# 暴露端口
EXPOSE 8000
# 健康检查
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health').read()"
# 启动应用
CMD ["python", "-m", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 OCI GenAI Gateway
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

240
README.md Normal file
View File

@@ -0,0 +1,240 @@
# OCI GenAI to OpenAI API 网关
> 🚀 为 Oracle Cloud Infrastructure 的 Generative AI Service 提供 OpenAI 兼容的 REST API
[![License](https://img.shields.io/badge/license-UPL-blue.svg)](LICENSE)
[![Python](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
[![FastAPI](https://img.shields.io/badge/FastAPI-0.115.0-green.svg)](https://fastapi.tiangolo.com/)
## 📖 简介
这是一个 FastAPI 服务,作为 OCI Generative AI 和 OpenAI API 之间的转换层,允许 OpenAI SDK 客户端无需修改代码即可与 OCI GenAI 模型交互。
## ✨ 主要特性
- 🔄 **OpenAI API 兼容**: 完全兼容 OpenAI SDK无需修改现有代码
- 🤖 **动态模型发现**: 启动时自动从 OCI 获取所有可用模型
- 🌐 **多区域负载均衡**: 支持多个 OCI profiles 的 round-robin 负载均衡
- 🖼️ **多模态支持**: 支持文本、图像Vision 模型、Base64 编码等多种内容类型
-**真实流式传输**: 真正的边缘到边缘流式响应TTFB < 200ms
- 🔒 **安全性**: 自动过滤敏感信息OCID、request-id、endpoint URLs
- 🎯 **性能优化**: 客户端连接池机制,显著提升性能
## 🚀 快速开始
### 前置要求
- Python 3.8+
- OCI 账号和 API 密钥
- OCI Generative AI 服务访问权限
### 安装
1. **克隆仓库**
```bash
git clone <repository-url>
cd oracle-openai
```
2. **安装依赖**
```bash
pip install -r requirements.txt
```
3. **配置 OCI**
创建或编辑 `~/.oci/config`:
```ini
[DEFAULT]
user=ocid1.user.oc1...
fingerprint=aa:bb:cc:dd...
key_file=~/.oci/oci_api_key.pem
tenancy=ocid1.tenancy.oc1...
region=us-chicago-1
```
4. **配置环境变量**
复制 `.env.example` 到 `.env` 并编辑:
```bash
cp .env.example .env
# 编辑 .env 文件设置 API_KEYS 和其他配置
```
5. **运行服务**
```bash
cd src
python main.py
```
服务将在 `http://localhost:8000` 启动
## 💻 使用示例
### 使用 cURL
```bash
curl http://localhost:8000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer sk-oci-genai-default-key" \
-d '{
"model": "google.gemini-2.5-pro",
"messages": [{"role": "user", "content": "你好!"}]
}'
```
### 使用 Python OpenAI SDK
```python
from openai import OpenAI
client = OpenAI(
api_key="sk-oci-genai-default-key",
base_url="http://localhost:8000/v1"
)
response = client.chat.completions.create(
model="google.gemini-2.5-pro",
messages=[{"role": "user", "content": "你好!"}]
)
print(response.choices[0].message.content)
```
### 流式响应
```python
stream = client.chat.completions.create(
model="google.gemini-2.5-pro",
messages=[{"role": "user", "content": "从1数到10"}],
stream=True
)
for chunk in stream:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
```
### Vision 模型(多模态)
```python
response = client.chat.completions.create(
model="google.gemini-2.5-pro",
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "描述这张图片"},
{
"type": "image_url",
"image_url": {
"url": "https://example.com/image.jpg"
}
}
]
}
]
)
```
## 📋 支持的端点
| 端点 | 方法 | 说明 |
|------|------|------|
| `/health` | GET | 健康检查 |
| `/v1/models` | GET | 列出所有可用模型 |
| `/v1/chat/completions` | POST | 对话补全(支持流式) |
| `/v1/embeddings` | POST | 文本嵌入 |
## 🎨 支持的模型
服务启动时自动从 OCI 发现可用模型,包括:
- **Cohere**: command-r-plus, command-r-16k 等
- **Meta**: llama-3.1-405b, llama-3.1-70b, llama-3.2-90b-vision 等
- **Google**: gemini 系列
- **OpenAI**: gpt 系列
- **xAI**: grok 系列
使用 `GET /v1/models` 查看所有可用模型。
## ⚙️ 配置选项
### 关键环境变量
| 变量 | 说明 | 默认值 |
|------|------|--------|
| `API_KEYS` | API 密钥列表JSON 数组) | - |
| `OCI_CONFIG_PROFILE` | OCI 配置 profile支持多个逗号分隔 | `DEFAULT` |
| `OCI_AUTH_TYPE` | 认证类型 | `api_key` |
| `MAX_TOKENS` | 默认最大 token 数 | `4096` |
| `TEMPERATURE` | 默认温度参数 | `0.7` |
| `ENABLE_STREAMING` | 全局流式开关 | `true` |
| `LOG_LEVEL` | 日志级别 | `INFO` |
完整配置请参考 [.env.example](.env.example)
## 🌐 多区域负载均衡
支持配置多个 OCI profiles 实现自动负载均衡:
```bash
# .env 文件
OCI_CONFIG_PROFILE=DEFAULT,CHICAGO,ASHBURN
```
系统将使用 round-robin 策略在不同区域之间分配请求。
## 🐳 Docker 部署
```bash
# 使用 docker-compose
docker-compose up
# 或使用 Docker
docker build -t oci-genai-gateway .
docker run -p 8000:8000 --env-file .env oci-genai-gateway
```
## 📚 文档
- [CLAUDE.md](CLAUDE.md) - 完整的开发文档,包含架构说明、开发指南和调试技巧
- [.env.example](.env.example) - 环境变量配置示例
## 🔧 故障排除
### 常见问题
1. **模型未找到**
- 检查模型 ID 拼写
- 确认模型在您的 OCI 区域可用
- 查看启动日志确认模型已加载
2. **认证失败**
- 验证 `~/.oci/config` 配置正确
- 检查 API 密钥文件权限:`chmod 600 ~/.oci/oci_api_key.pem`
- 运行 `oci iam region list` 测试 OCI 配置
3. **429 速率限制错误**
- 使用多个 profile 进行负载均衡
- 等待 1-2 分钟后重试
更多故障排除信息请参考 [CLAUDE.md](CLAUDE.md#调试)
## 🤝 贡献
欢迎贡献!请随时提交 issues 或 pull requests。
## 📄 许可证
本项目基于 UPL (Universal Permissive License) 开源,详见 [LICENSE](LICENSE) 文件。
## 🙏 致谢
- [FastAPI](https://fastapi.tiangolo.com/) - 现代、快速的 Web 框架
- [OCI Python SDK](https://github.com/oracle/oci-python-sdk) - Oracle Cloud Infrastructure SDK
- [OpenAI](https://openai.com/) - API 设计参考
---
**⭐ 如果这个项目对您有帮助,请给我们一个 Star**

35
docker-compose.yml Normal file
View File

@@ -0,0 +1,35 @@
version: '3.8'
services:
oci-genai-gateway:
build:
context: .
dockerfile: Dockerfile
container_name: oci-genai-gateway
ports:
- "8000:8000"
volumes:
# 挂载 OCI 配置文件(根据实际路径调整)
- ~/.oci:/root/.oci:ro
# 挂载环境配置文件
- .env:/app/.env:ro
# 挂载日志目录
- ./logs:/app/logs
environment:
- API_TITLE=OCI GenAI to OpenAI API Gateway
- API_VERSION=0.0.1
- DEBUG=false
- LOG_LEVEL=INFO
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
networks:
- genai-network
networks:
genai-network:
driver: bridge

24
init.sh Normal file
View File

@@ -0,0 +1,24 @@
#!/bin/sh
# Modify the CMakeLists.txt and source files to change the project name from "xmrigcc" to "xxxigcc"
sed -i 's/project(xmrigcc)/project(xxxigcc)/' CMakeLists.txt
sed -i 's/XMRigCC: Found ccache package/XXXigCC: Found ccache package/' CMakeLists.txt
sed -i 's/MINER_EXECUTABLE_NAME "xmrigMiner"/MINER_EXECUTABLE_NAME "xxxigMiner"/' CMakeLists.txt
sed -i 's/DAEMON_EXECUTABLE_NAME "xmrigDaemon"/DAEMON_EXECUTABLE_NAME "xxxigDaemon"/' CMakeLists.txt
sed -i 's/xmrigServer ${SOURCES_CC_SERVER}/xxxigServer ${SOURCES_CC_SERVER}/' CMakeLists.txt
sed -i 's/xmrigServer ${XMRIG_ASM_LIBRARY}/xxxigServer ${XMRIG_ASM_LIBRARY}/' CMakeLists.txt
sed -i 's/xmrigServer POST_BUILD/xxxigServer POST_BUILD/' CMakeLists.txt
# Modify donate functionality
sed -i 's/kDefaultDonateLevel = 3/kDefaultDonateLevel = 0/' src/donate.h
sed -i 's/kMinimumDonateLevel = 1/kMinimumDonateLevel = 0/' src/donate.h
sed -i 's/donate.graef.in/127.0.0.1/' src/net/strategies/DonateStrategy.cpp
sed -i 's/87.106.163.52/127.0.0.1/' src/net/strategies/DonateStrategy.cpp
sed -i 's/"donate-level": 3/"donate-level": 0/' src/config.json
sed -i 's/"donate-over-proxy": 1/"donate-over-proxy": 0/' src/config.json
# Modify version information
sed -i 's/Copyright (C) 2017- XMRigCC//' src/version.h
sed -i 's/https:\/\/github.com\/BenDr0id\/xmrigCC\///' src/version.h
sed -i 's/xmrigcc/xxxigcc/' src/version.h
sed -i 's/XMRigCC/XXXigCC/' src/version.h

12
requirements.txt Normal file
View File

@@ -0,0 +1,12 @@
# FastAPI and server
fastapi==0.115.0
uvicorn[standard]==0.32.0
pydantic==2.9.2
pydantic-settings==2.6.1
# OCI SDK (updated to latest stable version)
oci>=2.160.0
# Utilities
python-dotenv==1.0.1
python-multipart==0.0.17

1
src/api/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""API module for OCI GenAI Gateway."""

View File

@@ -0,0 +1 @@
"""Request/Response adapters module."""

View File

@@ -0,0 +1,104 @@
"""
Adapter for converting OpenAI requests to OCI GenAI format.
"""
import logging
from typing import List, Dict, Any, Optional
from ..schemas import ChatCompletionRequest, EmbeddingRequest
from core.config import get_settings
logger = logging.getLogger(__name__)
# Content type handlers for extensible multimodal support
CONTENT_TYPE_HANDLERS = {
"text": lambda item: {"type": "text", "text": item.get("text", "")},
"image_url": lambda item: {"type": "image_url", "image_url": item.get("image_url", {})},
"audio": lambda item: {"type": "audio", "audio_url": item.get("audio_url", {})},
"video": lambda item: {"type": "video", "video_url": item.get("video_url", {})}
}
def adapt_chat_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Adapt OpenAI chat messages to OCI GenAI format.
Args:
messages: OpenAI format messages
Returns:
Adapted messages for OCI GenAI
"""
adapted_messages = []
for msg in messages:
role = msg.get("role", "user")
content = msg.get("content", "")
# Handle different content types
if isinstance(content, list):
# Multimodal content
adapted_content = []
for item in content:
if isinstance(item, dict):
item_type = item.get("type")
handler = CONTENT_TYPE_HANDLERS.get(item_type)
if handler:
adapted_content.append(handler(item))
else:
logger.warning(f"Unknown content type: {item_type}, skipping")
adapted_messages.append({
"role": role,
"content": adapted_content
})
else:
# Simple text content
adapted_messages.append({
"role": role,
"content": content
})
return adapted_messages
def extract_chat_params(request: ChatCompletionRequest) -> Dict[str, Any]:
"""
Extract chat parameters from OpenAI request.
Args:
request: OpenAI chat completion request
Returns:
Dictionary of parameters for OCI GenAI
"""
settings = get_settings()
params = {
"temperature": request.temperature if request.temperature is not None else settings.temperature,
"max_tokens": request.max_tokens if request.max_tokens is not None else settings.max_tokens,
"top_p": request.top_p if request.top_p is not None else 1.0,
"stream": request.stream or False,
}
# Add tools if present
if request.tools:
params["tools"] = request.tools
return params
def adapt_embedding_input(request: EmbeddingRequest) -> List[str]:
"""
Adapt OpenAI embedding input to OCI GenAI format.
Args:
request: OpenAI embedding request
Returns:
List of texts to embed
"""
if isinstance(request.input, str):
return [request.input]
elif isinstance(request.input, list):
return request.input
else:
return [str(request.input)]

View File

@@ -0,0 +1,307 @@
"""
Adapter for converting OCI GenAI responses to OpenAI format.
"""
import time
import uuid
from typing import Dict, Any, List, Optional
from ..schemas import (
ChatCompletionResponse,
ChatCompletionChoice,
ChatCompletionUsage,
ChatMessage,
EmbeddingResponse,
EmbeddingData,
EmbeddingUsage,
)
def adapt_chat_response(
oci_response: Any,
model_id: str,
request_id: Optional[str] = None
) -> ChatCompletionResponse:
"""
Adapt OCI GenAI chat response to OpenAI format.
Args:
oci_response: OCI GenAI response object
model_id: Model identifier
request_id: Optional request ID
Returns:
OpenAI-compatible chat completion response
"""
response_id = request_id or f"chatcmpl-{uuid.uuid4().hex[:8]}"
created_at = int(time.time())
# Extract response data
chat_response = oci_response.data.chat_response
# Extract text content
if hasattr(chat_response, 'text'):
# Cohere format
raw_text = chat_response.text
# Try to parse as JSON if it's a string (OCI format)
try:
import json
parsed = json.loads(raw_text)
if isinstance(parsed, dict) and 'text' in parsed:
content = parsed['text']
else:
content = raw_text
except (json.JSONDecodeError, ValueError, TypeError):
# Not JSON, use as-is
content = raw_text
finish_reason = chat_response.finish_reason if hasattr(chat_response, 'finish_reason') else "stop"
elif hasattr(chat_response, 'choices') and len(chat_response.choices) > 0:
# Llama/Generic format
choice = chat_response.choices[0]
if hasattr(choice, 'message'):
raw_content = choice.message.content
# Handle list format: [TextContent(text="...", type="TEXT")] or [{"text": "...", "type": "TEXT"}]
if isinstance(raw_content, list):
# Build multimodal content array
adapted_content = []
for item in raw_content:
# Handle OCI TextContent object
if hasattr(item, 'text') and hasattr(item, 'type'):
if item.type == 'TEXT' or item.type == 'text':
adapted_content.append({
"type": "text",
"text": item.text
})
# Future: handle IMAGE, AUDIO, VIDEO types
# Handle dict format
elif isinstance(item, dict):
item_type = item.get('type', 'TEXT').upper()
if item_type == 'TEXT':
adapted_content.append({
"type": "text",
"text": item.get('text', '')
})
# Future: handle other types
else:
# Fallback: convert to text
adapted_content.append({
"type": "text",
"text": str(item)
})
# Simplify to string if only one text element (backward compatibility)
if len(adapted_content) == 1 and adapted_content[0].get('type') == 'text':
content = adapted_content[0]['text']
else:
content = adapted_content
elif isinstance(raw_content, str):
# Try to parse as JSON if it's a string (OCI format)
try:
import json
parsed = json.loads(raw_content)
if isinstance(parsed, dict) and 'text' in parsed:
content = parsed['text']
else:
content = raw_content
except (json.JSONDecodeError, ValueError):
# Not JSON, use as-is
content = raw_content
else:
content = raw_content
else:
content = str(choice)
finish_reason = choice.finish_reason if hasattr(choice, 'finish_reason') else "stop"
else:
content = str(chat_response)
finish_reason = "stop"
# Create message
message = ChatMessage(
role="assistant",
content=content
)
# Create choice
choice = ChatCompletionChoice(
index=0,
message=message,
finish_reason=finish_reason
)
# Extract usage information
usage = None
if hasattr(oci_response.data, 'usage'):
oci_usage = oci_response.data.usage
usage = ChatCompletionUsage(
prompt_tokens=getattr(oci_usage, 'prompt_tokens', 0) or 0,
completion_tokens=getattr(oci_usage, 'completion_tokens', 0) or 0,
total_tokens=getattr(oci_usage, 'total_tokens', 0) or 0
)
return ChatCompletionResponse(
id=response_id,
object="chat.completion",
created=created_at,
model=model_id,
choices=[choice],
usage=usage
)
def adapt_streaming_chunk(
chunk_data: str,
model_id: str,
request_id: str,
index: int = 0,
is_first: bool = False
) -> str:
"""
Adapt OCI GenAI streaming chunk to OpenAI SSE format.
Args:
chunk_data: Chunk text from OCI GenAI
model_id: Model identifier
request_id: Request ID
index: Chunk index
is_first: Whether this is the first chunk (should include role with empty content)
Returns:
OpenAI-compatible SSE formatted string
"""
created_at = int(time.time())
# Build delta - first chunk should include role with empty content
delta = {}
if is_first:
delta["role"] = "assistant"
delta["content"] = "" # First chunk has empty content like OpenAI
elif chunk_data:
delta["content"] = chunk_data
chunk = {
"id": request_id,
"object": "chat.completion.chunk",
"created": created_at,
"model": model_id,
"system_fingerprint": None,
"choices": [
{
"index": index,
"delta": delta,
"logprobs": None,
"finish_reason": None
}
],
"usage": None
}
import json
return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
def adapt_streaming_done(
model_id: str,
request_id: str,
usage: Optional[Dict[str, int]] = None
) -> str:
"""
Create final SSE chunks for streaming completion (OpenAI format).
Returns two chunks:
1. Finish chunk with finish_reason="stop"
2. Usage chunk with empty choices and usage stats
Args:
model_id: Model identifier
request_id: Request ID
usage: Optional usage statistics
Returns:
Final SSE formatted strings (finish chunk + usage chunk + [DONE])
"""
import json
created_at = int(time.time())
result = ""
# First chunk: finish_reason with empty delta
finish_chunk = {
"id": request_id,
"object": "chat.completion.chunk",
"created": created_at,
"model": model_id,
"system_fingerprint": None,
"choices": [
{
"index": 0,
"delta": {},
"logprobs": None,
"finish_reason": "stop"
}
],
"usage": None
}
result += f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
# Second chunk: usage stats with empty choices (like OpenAI)
if usage:
usage_chunk = {
"id": request_id,
"object": "chat.completion.chunk",
"created": created_at,
"model": model_id,
"system_fingerprint": "",
"choices": [], # Empty choices array for usage chunk
"usage": usage
}
result += f"data: {json.dumps(usage_chunk, ensure_ascii=False)}\n\n"
# Final [DONE] marker
result += "data: [DONE]\n\n"
return result
def adapt_embedding_response(
oci_response: Any,
model_id: str,
input_count: int
) -> EmbeddingResponse:
"""
Adapt OCI GenAI embedding response to OpenAI format.
Args:
oci_response: OCI GenAI embedding response
model_id: Model identifier
input_count: Number of input texts
Returns:
OpenAI-compatible embedding response
"""
embeddings_data = []
# Extract embeddings
if hasattr(oci_response.data, 'embeddings'):
embeddings = oci_response.data.embeddings
for idx, embedding in enumerate(embeddings):
embeddings_data.append(
EmbeddingData(
object="embedding",
embedding=embedding,
index=idx
)
)
# Calculate usage (approximate)
# OCI doesn't always provide token counts, so we estimate
prompt_tokens = input_count * 10 # Rough estimate
usage = EmbeddingUsage(
prompt_tokens=prompt_tokens,
total_tokens=prompt_tokens
)
return EmbeddingResponse(
object="list",
data=embeddings_data,
model=model_id,
usage=usage
)

57
src/api/auth.py Normal file
View File

@@ -0,0 +1,57 @@
"""
API authentication module.
"""
import logging
from fastapi import HTTPException, Security, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from typing import List
from core.config import get_settings
logger = logging.getLogger(__name__)
security = HTTPBearer()
async def verify_api_key(
credentials: HTTPAuthorizationCredentials = Security(security)
) -> str:
"""
Verify API key from Authorization header.
Args:
credentials: HTTP authorization credentials
Returns:
Validated API key
Raises:
HTTPException: If API key is invalid
"""
api_key = credentials.credentials
settings = get_settings()
if api_key in settings.api_keys:
logger.debug("API key validated successfully")
return api_key
logger.warning(f"Invalid API key attempted: {api_key[:10]}...")
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid API key",
headers={"WWW-Authenticate": "Bearer"},
)
async def get_api_key(
credentials: HTTPAuthorizationCredentials = Security(security)
) -> str:
"""
Async wrapper for API key verification.
Args:
credentials: HTTP authorization credentials
Returns:
Validated API key
"""
return await verify_api_key(credentials)

161
src/api/error_handler.py Normal file
View File

@@ -0,0 +1,161 @@
"""
OCI 错误处理器 - 过滤敏感信息并提供用户友好的错误响应
此模块负责:
1. 拦截 OCI SDK ServiceError 异常
2. 过滤敏感信息OCID、request-id、endpoint URLs
3. 映射 OCI 错误码到 OpenAI 兼容格式
4. 生成用户友好的错误消息
"""
import re
import logging
from typing import Dict
from oci.exceptions import ServiceError
from api.schemas import ErrorDetail, ErrorResponse
logger = logging.getLogger(__name__)
class OCIErrorHandler:
"""OCI 错误处理器,负责过滤敏感信息并转换错误格式"""
# 预编译的正则模式(性能优化)
SENSITIVE_PATTERNS: Dict[str, re.Pattern] = {
'tenancy_ocid': re.compile(r'ocid1\.tenancy\.oc1\.\.[a-z0-9]+', re.IGNORECASE),
'compartment_ocid': re.compile(r'ocid1\.compartment\.oc1\.\.[a-z0-9]+', re.IGNORECASE),
'user_ocid': re.compile(r'ocid1\.user\.oc1\.\.[a-z0-9]+', re.IGNORECASE),
'endpoint_ocid': re.compile(r'ocid1\.generativeaiendpoint\.[a-z0-9\.\-]+', re.IGNORECASE),
'request_id': re.compile(r'[A-F0-9]{32}(/[A-F0-9]{32})*'),
'endpoint_url': re.compile(r'https://[a-z0-9\.\-]+\.oci(\.oraclecloud)?\.com[^\s\)]*', re.IGNORECASE),
}
# OCI 状态码到 OpenAI 错误类型的映射
OCI_TO_OPENAI_ERROR_TYPE: Dict[int, str] = {
400: "invalid_request_error",
401: "authentication_error",
403: "permission_error",
404: "invalid_request_error",
409: "invalid_request_error",
429: "rate_limit_error",
500: "server_error",
502: "server_error",
503: "server_error",
504: "server_error",
}
# 用户友好的错误消息模板
USER_FRIENDLY_MESSAGES: Dict[int, str] = {
400: "Invalid request parameters. Please check your input.",
401: "Authentication failed. Please verify your API credentials.",
403: "Access denied. You don't have permission to access this resource.",
404: "The requested resource was not found.",
409: "Request conflict. The resource may have been modified.",
429: "Request rate limit exceeded. Please retry after a short delay.",
500: "Internal server error. Please try again later.",
502: "Bad gateway. The upstream service is unavailable.",
503: "Service temporarily unavailable. Please try again later.",
504: "Gateway timeout. The request took too long to process.",
}
@classmethod
def sanitize_oci_error(cls, exc: ServiceError) -> ErrorResponse:
"""
处理 OCI ServiceError过滤敏感信息并返回用户友好的错误响应
Args:
exc: OCI ServiceError 异常对象
Returns:
ErrorResponse: 过滤后的错误响应
"""
# 完整错误记录到日志(供调试)
logger.error(
f"OCI ServiceError: status={exc.status}, code={exc.code}, "
f"request_id={exc.request_id}, message={exc.message}"
)
# 过滤敏感信息
filtered_message = cls.filter_sensitive_info(str(exc.message))
# 生成用户友好消息
user_message = cls.create_user_friendly_message(exc.status, filtered_message)
# 映射错误类型
error_type = cls.map_oci_status_to_openai(exc.status)
# 构建 ErrorResponse
error_detail = ErrorDetail(
message=user_message,
type=error_type,
code=f"oci_{exc.code.lower()}" if exc.code else "oci_error"
)
return ErrorResponse(error=error_detail)
@classmethod
def filter_sensitive_info(cls, text: str) -> str:
"""
过滤文本中的敏感信息
Args:
text: 原始文本
Returns:
str: 过滤后的文本
"""
filtered = text
# 遍历所有正则模式,替换敏感信息
for pattern_name, regex_pattern in cls.SENSITIVE_PATTERNS.items():
if pattern_name == 'tenancy_ocid':
filtered = regex_pattern.sub('tenancy:***', filtered)
elif pattern_name == 'endpoint_url':
filtered = regex_pattern.sub('https://***', filtered)
elif pattern_name == 'request_id':
filtered = regex_pattern.sub('request-id:***', filtered)
else:
filtered = regex_pattern.sub('***', filtered)
return filtered
@classmethod
def map_oci_status_to_openai(cls, status_code: int) -> str:
"""
映射 OCI 状态码到 OpenAI 错误类型
Args:
status_code: HTTP 状态码
Returns:
str: OpenAI 错误类型
"""
# 使用映射表转换,未知状态码默认为 server_error
return cls.OCI_TO_OPENAI_ERROR_TYPE.get(status_code, "server_error")
@classmethod
def create_user_friendly_message(cls, status_code: int, filtered_message: str) -> str:
"""
生成用户友好的错误消息
Args:
status_code: HTTP 状态码
filtered_message: 已过滤的原始错误消息
Returns:
str: 用户友好的错误消息
"""
# 优先使用预定义的友好消息
base_message = cls.USER_FRIENDLY_MESSAGES.get(
status_code,
"An unexpected error occurred. Please try again."
)
# 如果过滤后的消息仍有有用信息,附加到基础消息后
if filtered_message and filtered_message != str(status_code):
# 截取前200字符避免过长
truncated = filtered_message[:200]
return f"{base_message} Details: {truncated}"
return base_message

60
src/api/exceptions.py Normal file
View File

@@ -0,0 +1,60 @@
"""
Custom exceptions for the API.
"""
from fastapi import HTTPException
class ModelNotFoundException(HTTPException):
"""
Exception raised when a requested model is not found.
This exception is OpenAI API compatible and returns:
- HTTP Status: 404
- Error type: "invalid_request_error"
- Error code: "model_not_found"
"""
def __init__(self, model_id: str):
"""
Initialize ModelNotFoundException.
Args:
model_id: The model ID that was not found
"""
self.model_id = model_id
self.error_code = "model_not_found"
self.error_type = "invalid_request_error"
# HTTPException detail will be the message
super().__init__(
status_code=404,
detail=f"The model '{model_id}' does not exist or is not supported"
)
class InvalidModelTypeException(HTTPException):
"""
Exception raised when a model exists but is not the correct type.
For example, using an embedding model for chat or vice versa.
"""
def __init__(self, model_id: str, expected_type: str, actual_type: str):
"""
Initialize InvalidModelTypeException.
Args:
model_id: The model ID
expected_type: Expected model type (e.g., "chat", "embedding")
actual_type: Actual model type
"""
self.model_id = model_id
self.expected_type = expected_type
self.actual_type = actual_type
self.error_code = "invalid_model_type"
self.error_type = "invalid_request_error"
super().__init__(
status_code=400,
detail=f"Model '{model_id}' is a {actual_type} model, not a {expected_type} model"
)

View File

@@ -0,0 +1 @@
"""API routers module."""

417
src/api/routers/chat.py Normal file
View File

@@ -0,0 +1,417 @@
"""
Chat completions API router - OpenAI compatible chat endpoint.
"""
import asyncio
import logging
import os
import uuid
from typing import AsyncIterator, Union
from fastapi import APIRouter, Depends, HTTPException
from fastapi.responses import StreamingResponse
from oci.exceptions import ServiceError
from api.auth import get_api_key
from api.schemas import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ErrorDetail
from api.error_handler import OCIErrorHandler
from api.exceptions import ModelNotFoundException, InvalidModelTypeException
from api.adapters.request_adapter import adapt_chat_messages, extract_chat_params
from api.adapters.response_adapter import (
adapt_chat_response,
adapt_streaming_chunk,
adapt_streaming_done,
)
from core.config import get_settings
from core.client_manager import get_client_manager
from core.models import get_model_config
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/chat",
tags=["chat"],
dependencies=[Depends(get_api_key)]
)
def extract_delta_from_chunk(chunk) -> str:
"""
Extract delta text content from OCI streaming chunk.
Args:
chunk: OCI streaming response chunk (can be SSE Event, parsed object, etc.)
Returns:
Delta text content or empty string
"""
try:
# Handle SSE Event objects (from SSEClient)
if hasattr(chunk, 'data'):
import json
# Parse JSON data from SSE event
try:
parsed = json.loads(chunk.data)
# Recursively extract from parsed object
if isinstance(parsed, dict):
# OCI Streaming format: message.content[].text
if 'message' in parsed and 'content' in parsed['message']:
content_array = parsed['message']['content']
if isinstance(content_array, list) and len(content_array) > 0:
# Extract text from all TEXT type content items
text_parts = []
for item in content_array:
if isinstance(item, dict) and item.get('type') == 'TEXT' and 'text' in item:
text_parts.append(item['text'])
if text_parts:
return ''.join(text_parts)
# Try to get text from various possible locations
if 'text' in parsed:
return parsed['text']
if 'chatResponse' in parsed and 'text' in parsed['chatResponse']:
return parsed['chatResponse']['text']
if 'choices' in parsed and len(parsed['choices']) > 0:
choice = parsed['choices'][0]
if 'delta' in choice and 'content' in choice['delta']:
return choice['delta']['content']
except (json.JSONDecodeError, KeyError, TypeError):
# Return raw data if not JSON
return str(chunk.data) if chunk.data else ""
# Try to extract from chat_response.text (Cohere format)
if hasattr(chunk, 'chat_response') and hasattr(chunk.chat_response, 'text'):
return chunk.chat_response.text
# Try to extract from choices[0].delta.content (Generic format)
if hasattr(chunk, 'chat_response') and hasattr(chunk.chat_response, 'choices'):
if len(chunk.chat_response.choices) > 0:
choice = chunk.chat_response.choices[0]
if hasattr(choice, 'delta') and hasattr(choice.delta, 'content'):
content = choice.delta.content
if isinstance(content, str):
return content
elif isinstance(content, list):
# Handle TextContent list
text_parts = []
for item in content:
if isinstance(item, dict) and 'text' in item:
text_parts.append(item['text'])
elif hasattr(item, 'text'):
text_parts.append(item.text)
return "".join(text_parts)
# Try direct text attribute
if hasattr(chunk, 'text'):
return chunk.text
except Exception as e:
logger.warning(f"Failed to extract delta from chunk: {e}")
return ""
def extract_content_from_response(chat_response) -> str:
"""
Extract full content from non-streaming OCI response.
Args:
chat_response: OCI chat response object
Returns:
Full text content
"""
if hasattr(chat_response, 'text'):
raw_text = chat_response.text
# Try to parse as JSON if it's a string (OCI format)
try:
import json
parsed = json.loads(raw_text)
if isinstance(parsed, dict) and 'text' in parsed:
return parsed['text']
return raw_text
except (json.JSONDecodeError, ValueError, TypeError):
return raw_text
elif hasattr(chat_response, 'choices') and len(chat_response.choices) > 0:
choice = chat_response.choices[0]
if hasattr(choice, 'message'):
raw_content = choice.message.content
# Handle list format
if isinstance(raw_content, list):
text_parts = []
for item in raw_content:
if isinstance(item, dict):
text_parts.append(item.get('text', ''))
elif hasattr(item, 'text'):
text_parts.append(item.text)
else:
text_parts.append(str(item))
return "".join(text_parts)
elif isinstance(raw_content, str):
try:
import json
parsed = json.loads(raw_content)
if isinstance(parsed, dict) and 'text' in parsed:
return parsed['text']
return raw_content
except (json.JSONDecodeError, ValueError):
return raw_content
else:
return str(raw_content)
return str(choice)
return str(chat_response)
@router.post("/completions", response_model=ChatCompletionResponse)
async def create_chat_completion(request: ChatCompletionRequest):
"""
Create a chat completion using OCI Generative AI.
Args:
request: Chat completion request
Returns:
Chat completion response
"""
logger.info(f"Chat completion request for model: {request.model}")
settings = get_settings()
# Validate model exists
model_config = get_model_config(request.model)
if not model_config:
raise ModelNotFoundException(request.model)
# Validate model type is chat (ondemand or dedicated)
if model_config.type not in ("ondemand", "dedicated"):
raise InvalidModelTypeException(
model_id=request.model,
expected_type="chat",
actual_type=model_config.type
)
# Note: Multimodal capability validation is handled by the model itself
# If a model doesn't support certain content types, it will raise an error
# For example, Cohere models will raise ValueError for non-text content
# Get OCI client from manager (轮询负载均衡)
client_manager = get_client_manager()
oci_client = client_manager.get_client()
# Adapt messages
messages = adapt_chat_messages([msg.dict() for msg in request.messages])
# Extract parameters
params = extract_chat_params(request)
# Check global streaming setting
# If streaming is globally disabled, override client request
enable_stream = request.stream and settings.enable_streaming
if not settings.enable_streaming and request.stream:
logger.info("Streaming requested but globally disabled via ENABLE_STREAMING=false")
# Handle streaming
if enable_stream:
request_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
async def generate_stream() -> AsyncIterator[str]:
"""Generate streaming response with true non-blocking streaming."""
try:
# Run OCI SDK call in executor to prevent blocking
# This is critical for achieving true streaming (msToFirstChunk < 1s)
loop = asyncio.get_event_loop()
response = await loop.run_in_executor(
None,
lambda: oci_client.chat(
model_id=request.model,
messages=messages,
temperature=params["temperature"],
max_tokens=params["max_tokens"],
top_p=params["top_p"],
stream=True, # Enable real streaming
tools=params.get("tools"),
)
)
# Process real streaming response
accumulated_usage = None
# Check if response.data is an SSE stream (iterable)
# When stream=True, OCI SDK returns response.data as SSEClient
try:
# Try to iterate over the stream
stream_data = response.data if hasattr(response, 'data') else response
# Check if it's SSEClient or any iterable type
stream_type_name = type(stream_data).__name__
is_sse_client = 'SSEClient' in stream_type_name
is_iterable = hasattr(stream_data, '__iter__') or hasattr(stream_data, '__next__')
# SSEClient is always treated as streaming, even if hasattr check fails
if is_sse_client or is_iterable:
# Real streaming: iterate over chunks
# SSEClient requires calling .events() method to iterate
if is_sse_client and hasattr(stream_data, 'events'):
iterator = stream_data.events()
else:
iterator = stream_data
# Send first chunk with role and empty content (OpenAI format)
yield adapt_streaming_chunk("", request.model, request_id, 0, is_first=True)
# Use queue for thread-safe chunk forwarding
import queue
import threading
chunk_queue = queue.Queue()
def read_chunks():
"""Read chunks in background thread and put in queue."""
try:
for chunk in iterator:
chunk_queue.put(("chunk", chunk))
chunk_queue.put(("done", None))
except Exception as e:
chunk_queue.put(("error", e))
# Start background thread to read chunks
reader_thread = threading.Thread(target=read_chunks, daemon=True)
reader_thread.start()
# Yield chunks as they arrive from queue
while True:
# Non-blocking queue get with timeout
try:
msg_type, data = await loop.run_in_executor(
None,
lambda: chunk_queue.get(timeout=0.01)
)
except queue.Empty:
# Allow other async tasks to run
await asyncio.sleep(0)
continue
if msg_type == "done":
break
elif msg_type == "error":
raise data
elif msg_type == "chunk":
chunk = data
# Extract delta content from chunk
delta_text = extract_delta_from_chunk(chunk)
if delta_text:
yield adapt_streaming_chunk(delta_text, request.model, request_id, 0, is_first=False)
# Try to extract usage from chunk (typically in final chunk)
# Handle both SSE Event format and object format
if hasattr(chunk, 'data'):
# SSE Event - parse JSON to extract usage
try:
import json
parsed = json.loads(chunk.data)
if isinstance(parsed, dict) and 'usage' in parsed:
usage_data = parsed['usage']
accumulated_usage = {
"prompt_tokens": usage_data.get('promptTokens', 0) or 0,
"completion_tokens": usage_data.get('completionTokens', 0) or 0,
"total_tokens": usage_data.get('totalTokens', 0) or 0
}
except (json.JSONDecodeError, KeyError, TypeError):
pass
elif hasattr(chunk, 'usage') and chunk.usage:
# Object format
accumulated_usage = {
"prompt_tokens": getattr(chunk.usage, 'prompt_tokens', 0) or 0,
"completion_tokens": getattr(chunk.usage, 'completion_tokens', 0) or 0,
"total_tokens": getattr(chunk.usage, 'total_tokens', 0) or 0
}
# Send done message with usage
yield adapt_streaming_done(request.model, request_id, usage=accumulated_usage)
else:
# Fallback: non-streaming response, simulate streaming
logger.warning(f"OCI SDK returned non-iterable response (type: {type(stream_data).__name__}), falling back to simulated streaming")
# Extract text from non-streaming response
chat_response = stream_data.chat_response if hasattr(stream_data, 'chat_response') else stream_data
content = extract_content_from_response(chat_response)
# Extract usage information
if hasattr(stream_data, 'usage'):
oci_usage = stream_data.usage
accumulated_usage = {
"prompt_tokens": getattr(oci_usage, 'prompt_tokens', 0) or 0,
"completion_tokens": getattr(oci_usage, 'completion_tokens', 0) or 0,
"total_tokens": getattr(oci_usage, 'total_tokens', 0) or 0
}
# Simulate streaming by chunking
# First send empty chunk with role (OpenAI format)
yield adapt_streaming_chunk("", request.model, request_id, 0, is_first=True)
chunk_size = settings.stream_chunk_size
for i in range(0, len(content), chunk_size):
chunk = content[i:i + chunk_size]
yield adapt_streaming_chunk(chunk, request.model, request_id, 0, is_first=False)
yield adapt_streaming_done(request.model, request_id, usage=accumulated_usage)
except TypeError as te:
# Handle case where response is not iterable at all
logger.error(f"Response is not iterable: {te}", exc_info=True)
raise
except Exception as e:
logger.error(f"Error in streaming: {str(e)}", exc_info=True)
import json
# 根据异常类型处理并过滤敏感信息
if isinstance(e, ServiceError):
error_response = OCIErrorHandler.sanitize_oci_error(e)
else:
# 通用错误也要过滤可能包含的敏感信息
filtered_msg = OCIErrorHandler.filter_sensitive_info(str(e))
error_response = ErrorResponse(
error=ErrorDetail(
message="An error occurred during streaming",
type="server_error",
code="streaming_error"
)
)
yield f"data: {json.dumps(error_response.dict(), ensure_ascii=False)}\n\n"
return StreamingResponse(
generate_stream(),
media_type="text/event-stream"
)
# Non-streaming response
try:
response = oci_client.chat(
model_id=request.model,
messages=messages,
temperature=params["temperature"],
max_tokens=params["max_tokens"],
top_p=params["top_p"],
stream=False,
tools=params.get("tools"),
)
# Adapt response to OpenAI format
openai_response = adapt_chat_response(response, request.model)
if settings.log_responses:
logger.debug(f"Response: {openai_response}")
return openai_response
except Exception as e:
logger.error(f"Error in chat completion: {str(e)}", exc_info=True)
# 直接 raise让全局异常处理器统一过滤敏感信息
raise

View File

@@ -0,0 +1,85 @@
"""
Embeddings API router - OpenAI compatible embeddings endpoint.
"""
import logging
from fastapi import APIRouter, Depends, HTTPException
from api.auth import get_api_key
from api.schemas import EmbeddingRequest, EmbeddingResponse
from api.adapters.request_adapter import adapt_embedding_input
from api.adapters.response_adapter import adapt_embedding_response
from api.exceptions import ModelNotFoundException, InvalidModelTypeException
from core.config import get_settings
from core.client_manager import get_client_manager
from core.models import get_model_config
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/embeddings",
tags=["embeddings"],
dependencies=[Depends(get_api_key)]
)
@router.post("", response_model=EmbeddingResponse)
@router.post("/", response_model=EmbeddingResponse)
async def create_embeddings(request: EmbeddingRequest):
"""
Create embeddings using OCI Generative AI.
Args:
request: Embedding request
Returns:
Embedding response
"""
logger.info(f"Embedding request for model: {request.model}")
settings = get_settings()
# Validate model exists
model_config = get_model_config(request.model)
if not model_config:
raise ModelNotFoundException(request.model)
# Validate model type is embedding
if model_config.type != "embedding":
raise InvalidModelTypeException(
model_id=request.model,
expected_type="embedding",
actual_type=model_config.type
)
# Get OCI client from manager (轮询负载均衡)
client_manager = get_client_manager()
oci_client = client_manager.get_client()
# Adapt input
texts = adapt_embedding_input(request)
input_count = len(texts)
try:
# Generate embeddings
response = oci_client.embed(
model_id=request.model,
texts=texts,
truncate=settings.embed_truncate,
)
# Adapt response to OpenAI format
openai_response = adapt_embedding_response(
response,
request.model,
input_count
)
if settings.log_responses:
logger.debug(f"Embeddings generated: {len(openai_response.data)} vectors")
return openai_response
except Exception as e:
logger.error(f"Error in embedding generation: {str(e)}", exc_info=True)
# 直接 raise让全局异常处理器统一过滤敏感信息
raise

78
src/api/routers/models.py Normal file
View File

@@ -0,0 +1,78 @@
"""
Models API router - OpenAI compatible model listing.
"""
import logging
from fastapi import APIRouter, Depends
from api.auth import get_api_key
from api.schemas import ModelListResponse, ModelInfo
from core.models import get_all_models
logger = logging.getLogger(__name__)
router = APIRouter(
prefix="/models",
tags=["models"],
dependencies=[Depends(get_api_key)]
)
@router.get("", response_model=ModelListResponse)
@router.get("/", response_model=ModelListResponse)
async def list_models():
"""
List available models in OpenAI format.
Returns:
ModelListResponse: List of available models
"""
logger.info("Listing available models")
models = get_all_models()
model_list = [
ModelInfo(
id=model.id,
object="model",
created=0,
owned_by="oracle"
)
for model in models
]
return ModelListResponse(
object="list",
data=model_list
)
@router.get("/{model_id}", response_model=ModelInfo)
async def get_model(model_id: str):
"""
Get information about a specific model.
Args:
model_id: Model identifier
Returns:
ModelInfo: Model information
"""
logger.info(f"Getting model info: {model_id}")
from core.models import get_model_config
model_config = get_model_config(model_id)
if not model_config:
from fastapi import HTTPException
raise HTTPException(
status_code=404,
detail=f"Model {model_id} not found"
)
return ModelInfo(
id=model_config.id,
object="model",
created=0,
owned_by="oracle"
)

139
src/api/schemas.py Normal file
View File

@@ -0,0 +1,139 @@
"""
OpenAI-compatible API schemas.
"""
from typing import List, Optional, Union, Dict, Any, Literal
from pydantic import BaseModel, Field
# ============= Chat Completion Schemas =============
class ChatMessage(BaseModel):
"""A chat message."""
role: Literal["system", "user", "assistant", "tool"]
content: Union[str, List[Dict[str, Any]]]
name: Optional[str] = None
tool_calls: Optional[List[Dict[str, Any]]] = None
tool_call_id: Optional[str] = None
class ChatCompletionRequest(BaseModel):
"""OpenAI chat completion request."""
model: str
messages: List[ChatMessage]
temperature: Optional[float] = 0.7
top_p: Optional[float] = 1.0
n: Optional[int] = 1
stream: Optional[bool] = True # Default to streaming
stop: Optional[Union[str, List[str]]] = None
max_tokens: Optional[int] = None
presence_penalty: Optional[float] = 0.0
frequency_penalty: Optional[float] = 0.0
logit_bias: Optional[Dict[str, float]] = None
user: Optional[str] = None
tools: Optional[List[Dict[str, Any]]] = None
tool_choice: Optional[Union[str, Dict[str, Any]]] = None
class ChatCompletionChoice(BaseModel):
"""A chat completion choice."""
index: int
message: ChatMessage
finish_reason: Optional[str] = None
logprobs: Optional[Dict[str, Any]] = None
class ChatCompletionUsage(BaseModel):
"""Token usage information."""
prompt_tokens: int
completion_tokens: int
total_tokens: int
class ChatCompletionResponse(BaseModel):
"""OpenAI chat completion response."""
id: str
object: str = "chat.completion"
created: int
model: str
choices: List[ChatCompletionChoice]
usage: Optional[ChatCompletionUsage] = None
system_fingerprint: Optional[str] = None
class ChatCompletionStreamChoice(BaseModel):
"""A streaming chat completion choice."""
index: int
delta: Dict[str, Any]
finish_reason: Optional[str] = None
class ChatCompletionStreamResponse(BaseModel):
"""OpenAI streaming chat completion response."""
id: str
object: str = "chat.completion.chunk"
created: int
model: str
choices: List[ChatCompletionStreamChoice]
system_fingerprint: Optional[str] = None
# ============= Embedding Schemas =============
class EmbeddingRequest(BaseModel):
"""OpenAI embedding request."""
model: str
input: Union[str, List[str]]
encoding_format: Optional[str] = "float"
user: Optional[str] = None
class EmbeddingData(BaseModel):
"""Embedding data."""
object: str = "embedding"
embedding: List[float]
index: int
class EmbeddingUsage(BaseModel):
"""Embedding usage information."""
prompt_tokens: int
total_tokens: int
class EmbeddingResponse(BaseModel):
"""OpenAI embedding response."""
object: str = "list"
data: List[EmbeddingData]
model: str
usage: EmbeddingUsage
# ============= Model Schemas =============
class ModelInfo(BaseModel):
"""Model information."""
id: str
object: str = "model"
created: int = 0
owned_by: str = "oracle"
class ModelListResponse(BaseModel):
"""Model list response."""
object: str = "list"
data: List[ModelInfo]
# ============= Error Schemas =============
class ErrorDetail(BaseModel):
"""Error detail."""
message: str
type: str
param: Optional[str] = None
code: Optional[str] = None
class ErrorResponse(BaseModel):
"""Error response."""
error: ErrorDetail

1
src/core/__init__.py Normal file
View File

@@ -0,0 +1 @@
"""Core module for OCI GenAI Gateway."""

View File

@@ -0,0 +1,70 @@
"""
简单的 OCI 客户端管理器,支持多 profile 轮询负载均衡
"""
import logging
from typing import List, Dict
from threading import Lock
from .config import Settings, get_settings
from .oci_client import OCIGenAIClient
logger = logging.getLogger(__name__)
class OCIClientManager:
"""OCI 客户端管理器,支持轮询负载均衡和客户端连接池"""
def __init__(self, settings: Settings = None):
self.settings = settings or get_settings()
self.profiles = self.settings.get_profiles()
self.current_index = 0
self.lock = Lock()
# 预创建客户端连接池
self._clients: Dict[str, OCIGenAIClient] = {}
logger.info(f"初始化 OCI 客户端管理器,共 {len(self.profiles)} 个 profiles: {self.profiles}")
for profile in self.profiles:
try:
self._clients[profile] = OCIGenAIClient(self.settings, profile)
logger.info(f"✓ 已创建客户端实例: {profile}")
except Exception as e:
logger.error(f"✗ 创建客户端实例失败 [{profile}]: {e}")
raise
def get_client(self) -> OCIGenAIClient:
"""
获取下一个客户端(轮询策略)
采用 round-robin 算法从预创建的客户端连接池中选择客户端实例。
此方法是线程安全的。
Returns:
OCIGenAIClient: 预创建的 OCI 客户端实例
Note:
客户端实例在管理器初始化时预创建,此方法不会创建新实例。
"""
with self.lock:
# 如果只有一个 profile直接返回
if len(self.profiles) == 1:
return self._clients[self.profiles[0]]
# 轮询选择 profile
profile = self.profiles[self.current_index]
self.current_index = (self.current_index + 1) % len(self.profiles)
logger.debug(f"选择 profile: {profile} (round-robin)")
return self._clients[profile]
# 全局客户端管理器实例
_client_manager = None
def get_client_manager() -> OCIClientManager:
"""获取全局客户端管理器实例"""
global _client_manager
if _client_manager is None:
_client_manager = OCIClientManager()
return _client_manager

100
src/core/config.py Normal file
View File

@@ -0,0 +1,100 @@
"""
Configuration module for OCI Generative AI to OpenAI API Gateway.
"""
import os
import logging
from pathlib import Path
from typing import Optional, List
from pydantic_settings import BaseSettings
logger = logging.getLogger(__name__)
# Find project root directory (where .env should be)
def find_project_root() -> Path:
"""Find the project root directory by looking for .env or requirements.txt."""
current = Path(__file__).resolve().parent # Start from src/core/
# Go up until we find project root markers
while current != current.parent:
if (current / ".env").exists() or (current / "requirements.txt").exists():
return current
current = current.parent
return Path.cwd() # Fallback to current directory
PROJECT_ROOT = find_project_root()
class Settings(BaseSettings):
"""Application settings with environment variable support."""
# API Settings
api_title: str = "OCI GenAI to OpenAI API Gateway"
api_version: str = "1.0.0"
api_prefix: str = "/v1"
api_port: int = 8000
api_host: str = "0.0.0.0"
debug: bool = False
# Authentication
api_keys: List[str] = ["sk-oci-genai-default-key"]
# OCI Settings
oci_config_file: str = "~/.oci/config"
oci_config_profile: str = "DEFAULT" # 支持多个profile用逗号分隔例如DEFAULT,CHICAGO,ASHBURN
oci_auth_type: str = "api_key" # api_key or instance_principal
# GenAI Service Settings
genai_endpoint: Optional[str] = None
max_tokens: int = 4096
temperature: float = 0.7
# Embedding Settings
embed_truncate: str = "END" # END or START
# Streaming Settings
enable_streaming: bool = True
stream_chunk_size: int = 1024
# Logging
log_level: str = "INFO"
log_requests: bool = False
log_responses: bool = False
log_file: Optional[str] = None
log_file_max_size: int = 10 # MB
log_file_backup_count: int = 5
class Config:
# Use absolute path to .env file in project root
env_file = str(PROJECT_ROOT / ".env")
env_file_encoding = "utf-8"
case_sensitive = False
# Allow reading from environment variables
env_prefix = ""
def model_post_init(self, __context) -> None:
"""Expand OCI config file path."""
# Expand OCI config file path
config_path = os.path.expanduser(self.oci_config_file)
# If it's a relative path (starts with ./ or doesn't start with /), resolve it from project root
if not config_path.startswith('/') and not config_path.startswith('~'):
# Remove leading ./ if present
if config_path.startswith('./'):
config_path = config_path[2:]
config_path = str(PROJECT_ROOT / config_path)
# Update the config_path
self.oci_config_file = config_path
def get_profiles(self) -> List[str]:
"""获取配置的所有 profile 列表"""
return [p.strip() for p in self.oci_config_profile.split(',') if p.strip()]
# Global settings instance
settings = Settings()
def get_settings() -> Settings:
"""Get the global settings instance."""
return settings

260
src/core/models.py Normal file
View File

@@ -0,0 +1,260 @@
"""
Model definitions and configurations for OCI Generative AI models.
"""
import logging
import os
from typing import Dict, List, Optional
from pydantic import BaseModel
logger = logging.getLogger(__name__)
class ModelConfig(BaseModel):
"""Configuration for a single model."""
id: str
name: str
type: str # ondemand, dedicated, embedding
provider: str # cohere, meta, openai, etc.
region: Optional[str] = None
compartment_id: Optional[str] = None
endpoint: Optional[str] = None
supports_streaming: bool = True
supports_tools: bool = False
supports_multimodal: bool = False
multimodal_types: List[str] = []
max_tokens: int = 4096
context_window: int = 128000
# OCI Generative AI models (dynamically loaded from OCI at startup)
OCI_CHAT_MODELS: Dict[str, ModelConfig] = {}
OCI_EMBED_MODELS: Dict[str, ModelConfig] = {}
def get_all_models() -> List[ModelConfig]:
"""Get all available models."""
return list(OCI_CHAT_MODELS.values()) + list(OCI_EMBED_MODELS.values())
def get_chat_models() -> List[ModelConfig]:
"""Get all chat models."""
return list(OCI_CHAT_MODELS.values())
def get_embed_models() -> List[ModelConfig]:
"""Get all embedding models."""
return list(OCI_EMBED_MODELS.values())
def get_model_config(model_id: str) -> Optional[ModelConfig]:
"""Get configuration for a specific model."""
if model_id in OCI_CHAT_MODELS:
return OCI_CHAT_MODELS[model_id]
if model_id in OCI_EMBED_MODELS:
return OCI_EMBED_MODELS[model_id]
return None
def fetch_models_from_oci(compartment_id: Optional[str] = None, region: Optional[str] = None,
config_path: str = "./.oci/config",
profile: str = "DEFAULT") -> Dict[str, Dict[str, ModelConfig]]:
"""
Dynamically fetch available models from OCI Generative AI service.
If compartment_id or region are not provided, they will be read from the OCI config file.
- compartment_id defaults to 'tenancy' from config
- region defaults to 'region' from config
Args:
compartment_id: OCI compartment ID (optional, defaults to tenancy from config)
region: OCI region (optional, defaults to region from config)
config_path: Path to OCI config file
profile: OCI config profile name
Returns:
Dictionary with 'chat' and 'embed' keys containing model configs
"""
try:
import oci
from oci.generative_ai import GenerativeAiClient
# Load OCI configuration
config = oci.config.from_file(
file_location=os.path.expanduser(config_path),
profile_name=profile
)
# Use values from config if not provided
if not region:
region = config.get("region")
logger.info(f"📍 Using region from OCI config: {region}")
if not compartment_id:
compartment_id = config.get("tenancy")
logger.info(f"📦 Using tenancy as compartment_id: {compartment_id}")
if not region or not compartment_id:
logger.error("❌ Missing region or compartment_id in OCI config")
return {"chat": {}, "embed": {}}
# Create GenerativeAiClient (not GenerativeAiInferenceClient)
service_endpoint = f"https://generativeai.{region}.oci.oraclecloud.com"
logger.info(f"🔗 Connecting to OCI GenerativeAI endpoint: {service_endpoint}")
client = GenerativeAiClient(config, service_endpoint=service_endpoint)
chat_models = {}
embed_models = {}
# Fetch all models (without capability filter to work with tenancy compartment)
try:
logger.info("🔍 Fetching all models from OCI...")
logger.debug(f" Compartment ID: {compartment_id}")
logger.debug(f" Method: Fetching all models, will filter by capabilities in Python")
response = client.list_models(
compartment_id=compartment_id
)
logger.info(f"✅ Successfully fetched {len(response.data.items)} models from OCI")
# Filter models by capabilities in Python
for model in response.data.items:
model_id = model.display_name
provider = model_id.split(".")[0] if "." in model_id else "unknown"
capabilities = model.capabilities if hasattr(model, 'capabilities') else []
logger.debug(f" Processing: {model_id} (capabilities: {capabilities})")
# Chat models: have CHAT or TEXT_GENERATION capability
if 'CHAT' in capabilities or 'TEXT_GENERATION' in capabilities:
supports_streaming = True # Most models support streaming
supports_tools = provider in ["cohere", "meta"] # These providers support tools
# Detect multimodal support from capabilities
supports_multimodal = False
multimodal_types = []
if 'IMAGE' in capabilities or 'VISION' in capabilities:
supports_multimodal = True
multimodal_types.append("image")
chat_models[model_id] = ModelConfig(
id=model_id,
name=model.display_name,
type="ondemand",
provider=provider,
region=region,
compartment_id=compartment_id,
supports_streaming=supports_streaming,
supports_tools=supports_tools,
supports_multimodal=supports_multimodal,
multimodal_types=multimodal_types,
max_tokens=4096,
context_window=128000
)
# Embedding models: have TEXT_EMBEDDINGS capability
elif 'TEXT_EMBEDDINGS' in capabilities:
embed_models[model_id] = ModelConfig(
id=model_id,
name=model.display_name,
type="embedding",
provider=provider,
region=region,
compartment_id=compartment_id,
supports_streaming=False,
supports_tools=False,
max_tokens=512,
context_window=512
)
logger.info(f"✅ Filtered {len(chat_models)} chat models")
if chat_models:
logger.debug(f" Chat models: {', '.join(list(chat_models.keys())[:5])}{'...' if len(chat_models) > 5 else ''}")
logger.info(f"✅ Filtered {len(embed_models)} embedding models")
if embed_models:
logger.debug(f" Embed models: {', '.join(embed_models.keys())}")
except Exception as e:
logger.warning(f"⚠️ Failed to fetch models from OCI")
logger.warning(f" Error: {e}")
if hasattr(e, 'status'):
logger.warning(f" HTTP Status: {e.status}")
if hasattr(e, 'code'):
logger.warning(f" Error Code: {e.code}")
logger.info(f"💡 Tip: Check your OCI credentials and permissions")
return {"chat": chat_models, "embed": embed_models}
except Exception as e:
logger.error(f"❌ Failed to initialize OCI client for model discovery")
logger.error(f" Error: {e}")
logger.info("💡 Tip: Check your OCI credentials and permissions")
return {"chat": {}, "embed": {}}
def update_models_from_oci(compartment_id: Optional[str] = None,
region: Optional[str] = None,
config_path: str = "./.oci/config",
profile: str = "DEFAULT") -> None:
"""
Update global model dictionaries with models from OCI.
Raises RuntimeError if model fetching fails.
Priority for configuration values:
1. Explicitly provided parameters
2. Environment variables (OCI_COMPARTMENT_ID, OCI_REGION)
3. Values from .oci/config file (tenancy, region)
Raises:
RuntimeError: If no models can be fetched from OCI
"""
global OCI_CHAT_MODELS, OCI_EMBED_MODELS
# Priority: explicit params > environment > config file
if not compartment_id:
compartment_id = os.getenv("OCI_COMPARTMENT_ID")
if not region:
region = os.getenv("OCI_REGION")
# Note: If still not set, fetch_models_from_oci will try to read from config file
logger.info("🚀 Attempting to fetch models from OCI...")
fetched = fetch_models_from_oci(compartment_id, region, config_path, profile)
# Fail-fast: Require successful model fetching
if not fetched["chat"] and not fetched["embed"]:
error_msg = (
"❌ Failed to fetch any models from OCI.\n\n"
"Troubleshooting steps:\n"
"1. Verify your OCI credentials are configured correctly:\n"
f" - Config file: {config_path}\n"
f" - Profile: {profile}\n"
" - Run: oci iam region list (to test authentication)\n\n"
"2. Check your OCI permissions:\n"
" - Ensure you have access to Generative AI service\n"
" - Verify compartment_id/tenancy has available models\n\n"
"3. Check network connectivity:\n"
" - Ensure you can reach OCI endpoints\n"
f" - Test region: {region or 'from config file'}\n\n"
"4. Review logs above for detailed error messages"
)
logger.error(error_msg)
raise RuntimeError(
"Failed to fetch models from OCI. "
"The service cannot start without available models. "
"Check the logs above for troubleshooting guidance."
)
# Update global model registries
if fetched["chat"]:
OCI_CHAT_MODELS.clear()
OCI_CHAT_MODELS.update(fetched["chat"])
logger.info(f"✅ Loaded {len(OCI_CHAT_MODELS)} chat models from OCI")
if fetched["embed"]:
OCI_EMBED_MODELS.clear()
OCI_EMBED_MODELS.update(fetched["embed"])
logger.info(f"✅ Loaded {len(OCI_EMBED_MODELS)} embedding models from OCI")
logger.info(f"✅ Model discovery completed successfully")

361
src/core/oci_client.py Normal file
View File

@@ -0,0 +1,361 @@
"""
OCI Generative AI client wrapper.
"""
import os
import logging
from typing import Optional, AsyncIterator
import oci
from oci.generative_ai_inference import GenerativeAiInferenceClient
from oci.generative_ai_inference.models import (
ChatDetails,
CohereChatRequest,
GenericChatRequest,
OnDemandServingMode,
DedicatedServingMode,
CohereMessage,
Message,
TextContent,
EmbedTextDetails,
)
# Try to import multimodal content types
try:
from oci.generative_ai_inference.models import (
ImageContent,
ImageUrl,
AudioContent,
AudioUrl,
VideoContent,
VideoUrl,
)
MULTIMODAL_SUPPORTED = True
logger_init = logging.getLogger(__name__)
logger_init.info("OCI SDK multimodal content types available")
except ImportError:
MULTIMODAL_SUPPORTED = False
logger_init = logging.getLogger(__name__)
logger_init.warning("OCI SDK does not support multimodal content types, using dict format as fallback")
from .config import Settings
from .models import get_model_config, ModelConfig
logger = logging.getLogger(__name__)
def build_multimodal_content(content_list: list) -> list:
"""
Build OCI ChatContent object array from adapted content list.
Supports both HTTP URLs and Base64 data URIs (data:image/jpeg;base64,...).
Args:
content_list: List of content items from request adapter
Returns:
List of OCI ChatContent objects or dicts (fallback)
"""
if not MULTIMODAL_SUPPORTED:
# Fallback: return dict format, OCI SDK might auto-convert
return content_list
oci_contents = []
for item in content_list:
if not isinstance(item, dict):
continue
item_type = item.get("type")
if item_type == "text":
oci_contents.append(TextContent(text=item.get("text", "")))
elif item_type == "image_url":
image_data = item.get("image_url", {})
if "url" in image_data:
# ImageUrl accepts both HTTP URLs and data URIs (data:image/jpeg;base64,...)
img_url = ImageUrl(url=image_data["url"])
# Optional: support 'detail' parameter if provided
if "detail" in image_data:
img_url.detail = image_data["detail"]
oci_contents.append(ImageContent(image_url=img_url, type="IMAGE"))
elif item_type == "audio":
audio_data = item.get("audio_url", {})
if "url" in audio_data:
# AudioUrl accepts both HTTP URLs and data URIs (data:audio/wav;base64,...)
audio_url = AudioUrl(url=audio_data["url"])
oci_contents.append(AudioContent(audio_url=audio_url, type="AUDIO"))
elif item_type == "video":
video_data = item.get("video_url", {})
if "url" in video_data:
# VideoUrl accepts both HTTP URLs and data URIs (data:video/mp4;base64,...)
video_url = VideoUrl(url=video_data["url"])
oci_contents.append(VideoContent(video_url=video_url, type="VIDEO"))
return oci_contents if oci_contents else [TextContent(text="")]
class OCIGenAIClient:
"""Wrapper for OCI Generative AI client."""
def __init__(self, settings: Settings, profile: Optional[str] = None):
"""
初始化 OCI GenAI 客户端
Args:
settings: 应用设置
profile: 可选的 OCI 配置 profile 名称。如果未提供,使用 settings 中的第一个 profile
"""
self.settings = settings
self.profile = profile or settings.get_profiles()[0]
self._client: Optional[GenerativeAiInferenceClient] = None
self._config: Optional[oci.config.Config] = None
self._region: Optional[str] = None
self._compartment_id: Optional[str] = None
def _get_config(self) -> dict:
"""Get OCI configuration."""
if self._config is None:
if self.settings.oci_auth_type == "instance_principal":
signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
self._config = {"signer": signer}
else:
config_path = os.path.expanduser(self.settings.oci_config_file)
self._config = oci.config.from_file(
file_location=config_path,
profile_name=self.profile
)
# 从配置中读取 region 和 compartment_id
if self._region is None:
self._region = self._config.get("region")
if self._compartment_id is None:
self._compartment_id = self._config.get("tenancy")
return self._config
@property
def region(self) -> Optional[str]:
"""获取当前配置的区域"""
if self._region is None and self._config is None:
self._get_config()
return self._region
@property
def compartment_id(self) -> Optional[str]:
"""获取当前配置的 compartment ID"""
if self._compartment_id is None and self._config is None:
self._get_config()
return self._compartment_id
def _get_client(self) -> GenerativeAiInferenceClient:
"""Get or create OCI Generative AI Inference client with correct endpoint."""
config = self._get_config()
# Use INFERENCE endpoint (not management endpoint)
# Official format: https://inference.generativeai.{region}.oci.oraclecloud.com
inference_endpoint = f"https://inference.generativeai.{self.region}.oci.oraclecloud.com"
if isinstance(config, dict) and "signer" in config:
# For instance principal
client = GenerativeAiInferenceClient(
config={},
service_endpoint=inference_endpoint,
**config
)
return client
# For API key authentication
client = GenerativeAiInferenceClient(
config=config,
service_endpoint=inference_endpoint,
retry_strategy=oci.retry.NoneRetryStrategy(),
timeout=(10, 240)
)
return client
def chat(
self,
model_id: str,
messages: list,
temperature: float = 0.7,
max_tokens: int = 1024,
top_p: float = 1.0,
stream: bool = False,
tools: Optional[list] = None,
):
"""Send a chat completion request to OCI GenAI."""
model_config = get_model_config(model_id)
if not model_config:
raise ValueError(f"Unsupported model: {model_id}")
if not self.compartment_id:
raise ValueError("Compartment ID is required")
client = self._get_client()
# Prepare serving mode
if model_config.type == "dedicated" and model_config.endpoint:
serving_mode = DedicatedServingMode(endpoint_id=model_config.endpoint)
else:
serving_mode = OnDemandServingMode(model_id=model_id)
# Convert messages based on provider
if model_config.provider == "cohere":
chat_request = self._build_cohere_request(
messages, temperature, max_tokens, top_p, tools, stream
)
elif model_config.provider in ["meta", "xai", "google", "openai"]:
chat_request = self._build_generic_request(
messages, temperature, max_tokens, top_p, tools, model_config.provider, stream
)
else:
raise ValueError(f"Unsupported provider: {model_config.provider}")
chat_details = ChatDetails(
serving_mode=serving_mode,
compartment_id=self.compartment_id,
chat_request=chat_request,
)
logger.debug(f"Sending chat request to OCI GenAI: {model_id}")
response = client.chat(chat_details)
return response
def _build_cohere_request(
self, messages: list, temperature: float, max_tokens: int, top_p: float, tools: Optional[list], stream: bool = False
) -> CohereChatRequest:
"""Build Cohere chat request.
Note: Cohere models only support text content, not multimodal.
"""
# Convert messages to Cohere format
chat_history = []
message = None
for msg in messages:
role = msg["role"]
content = msg["content"]
# Extract text from multimodal content
if isinstance(content, list):
# Extract text parts only
text_parts = []
for item in content:
if isinstance(item, dict) and item.get("type") == "text":
text_parts.append(item.get("text", ""))
content = " ".join(text_parts) if text_parts else ""
if role == "system":
# Cohere uses preamble for system messages
continue
elif role == "user":
message = content
elif role == "assistant":
chat_history.append(
CohereMessage(role="CHATBOT", message=content)
)
elif role == "tool":
# Handle tool responses if needed
pass
# Get preamble from system messages
preamble_override = None
for msg in messages:
if msg["role"] == "system":
preamble_override = msg["content"]
break
return CohereChatRequest(
message=message,
chat_history=chat_history if chat_history else None,
preamble_override=preamble_override,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
is_stream=stream,
)
def _build_generic_request(
self, messages: list, temperature: float, max_tokens: int, top_p: float, tools: Optional[list], provider: str, stream: bool = False
) -> GenericChatRequest:
"""Build Generic chat request for Llama and other models."""
# Convert messages to Generic format
generic_messages = []
for msg in messages:
role = msg["role"]
content = msg["content"]
# Handle multimodal content
if isinstance(content, list):
# Build OCI ChatContent objects from multimodal content
oci_contents = build_multimodal_content(content)
else:
# Simple text content
if MULTIMODAL_SUPPORTED:
oci_contents = [TextContent(text=content)]
else:
# Fallback: use dict format
oci_contents = [{"type": "text", "text": content}]
if role == "user":
oci_role = "USER"
elif role in ["assistant", "model"]:
oci_role = "ASSISTANT"
elif role == "system":
oci_role = "SYSTEM"
else:
oci_role = role.upper()
# Create Message with role and content objects
logger.debug(f"Creating message with role: {oci_role}, provider: {provider}, original role: {role}")
generic_messages.append(
Message(
role=oci_role,
content=oci_contents
)
)
return GenericChatRequest(
messages=generic_messages,
temperature=temperature,
max_tokens=max_tokens,
top_p=top_p,
is_stream=stream,
)
def embed(
self,
model_id: str,
texts: list,
truncate: str = "END",
):
"""Generate embeddings using OCI GenAI."""
model_config = get_model_config(model_id)
if not model_config or model_config.type != "embedding":
raise ValueError(f"Invalid embedding model: {model_id}")
if not self.compartment_id:
raise ValueError("Compartment ID is required")
client = self._get_client()
serving_mode = OnDemandServingMode(
serving_type="ON_DEMAND",
model_id=model_id
)
embed_details = EmbedTextDetails(
serving_mode=serving_mode,
compartment_id=self.compartment_id,
inputs=texts,
truncate=truncate,
is_echo=False,
input_type="SEARCH_QUERY",
)
logger.debug(f"Sending embed request to OCI GenAI: {model_id}")
response = client.embed_text(embed_details)
return response

274
src/main.py Normal file
View File

@@ -0,0 +1,274 @@
"""
Main FastAPI application for OCI Generative AI to OpenAI API Gateway.
"""
import logging
import sys
import os
from contextlib import asynccontextmanager
from logging.handlers import RotatingFileHandler
from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.exceptions import RequestValidationError
from oci.exceptions import ServiceError
from core.config import get_settings
from core.models import update_models_from_oci
from api.routers import models, chat, embeddings
from api.schemas import ErrorResponse, ErrorDetail
from api.error_handler import OCIErrorHandler
from api.exceptions import ModelNotFoundException, InvalidModelTypeException
# Configure logging
def setup_logging():
"""Setup logging configuration."""
settings = get_settings()
# Create handlers list
handlers = [
logging.StreamHandler(sys.stdout)
]
# Add file handler if log_file is configured
if settings.log_file:
log_dir = os.path.dirname(settings.log_file)
if log_dir and not os.path.exists(log_dir):
os.makedirs(log_dir, exist_ok=True)
file_handler = RotatingFileHandler(
settings.log_file,
maxBytes=settings.log_file_max_size * 1024 * 1024, # Convert MB to bytes
backupCount=settings.log_file_backup_count,
encoding='utf-8'
)
handlers.append(file_handler)
logging.basicConfig(
level=getattr(logging, settings.log_level.upper()),
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
handlers=handlers
)
setup_logging()
logger = logging.getLogger(__name__)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan handler."""
logger.info("=" * 60)
logger.info("Starting OCI GenAI to OpenAI API Gateway")
logger.info("=" * 60)
settings = get_settings()
logger.info(f"API Version: {settings.api_version}")
logger.info(f"API Prefix: {settings.api_prefix}")
logger.info(f"Debug Mode: {settings.debug}")
logger.info(f"OCI Config: {settings.oci_config_file}")
profiles = settings.get_profiles()
logger.info(f"OCI Profiles: {', '.join(profiles)}")
try:
# Fetch models from OCI (fails fast if unable to fetch)
# 使用第一个 profile 进行模型发现
update_models_from_oci(
config_path=settings.oci_config_file,
profile=profiles[0] if profiles else "DEFAULT"
)
logger.info("=" * 60)
logger.info("✅ Startup completed successfully")
logger.info(f"Server listening on {settings.api_host}:{settings.api_port}")
logger.info("=" * 60)
except RuntimeError as e:
logger.error("=" * 60)
logger.error("❌ STARTUP FAILED")
logger.error("=" * 60)
logger.error(f"Reason: {str(e)}")
logger.error("")
logger.error("The service cannot start without available models from OCI.")
logger.error("Please review the troubleshooting steps above and fix the issue.")
logger.error("=" * 60)
raise
except Exception as e:
logger.error("=" * 60)
logger.error("❌ UNEXPECTED STARTUP ERROR")
logger.error("=" * 60)
logger.error(f"Error type: {type(e).__name__}")
logger.error(f"Error message: {str(e)}")
logger.error("=" * 60)
raise
yield
logger.info("=" * 60)
logger.info("Shutting down OCI GenAI to OpenAI API Gateway")
logger.info("=" * 60)
# Create FastAPI app
settings = get_settings()
app = FastAPI(
title=settings.api_title,
version=settings.api_version,
description="OpenAI-compatible REST API for Oracle Cloud Infrastructure Generative AI Service",
lifespan=lifespan,
docs_url="/docs" if settings.debug else None,
redoc_url="/redoc" if settings.debug else None,
)
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Exception handlers
@app.exception_handler(ModelNotFoundException)
async def model_not_found_handler(request: Request, exc: ModelNotFoundException):
"""Handle model not found exceptions with OpenAI-compatible format."""
error = ErrorDetail(
message=exc.detail,
type=exc.error_type,
code=exc.error_code
)
return JSONResponse(
status_code=exc.status_code,
content=ErrorResponse(error=error).dict()
)
@app.exception_handler(InvalidModelTypeException)
async def invalid_model_type_handler(request: Request, exc: InvalidModelTypeException):
"""Handle invalid model type exceptions with OpenAI-compatible format."""
error = ErrorDetail(
message=exc.detail,
type=exc.error_type,
code=exc.error_code
)
return JSONResponse(
status_code=exc.status_code,
content=ErrorResponse(error=error).dict()
)
@app.exception_handler(ServiceError)
async def oci_service_error_handler(request: Request, exc: ServiceError):
"""Handle OCI SDK ServiceError exceptions."""
# 使用 OCIErrorHandler 处理并过滤敏感信息
error_response = OCIErrorHandler.sanitize_oci_error(exc)
# 确定 HTTP 状态码(使用 OCI 返回的状态码)
status_code = exc.status if 400 <= exc.status < 600 else 500
return JSONResponse(
status_code=status_code,
content=error_response.dict()
)
@app.exception_handler(HTTPException)
async def http_exception_handler(request: Request, exc: HTTPException):
"""Handle HTTP exceptions with sensitive information filtering."""
# 过滤 HTTPException detail 中可能包含的敏感信息
filtered_detail = OCIErrorHandler.filter_sensitive_info(str(exc.detail))
error = ErrorDetail(
message=filtered_detail,
type="invalid_request_error",
code=f"http_{exc.status_code}"
)
return JSONResponse(
status_code=exc.status_code,
content=ErrorResponse(error=error).dict()
)
@app.exception_handler(RequestValidationError)
async def validation_exception_handler(request: Request, exc: RequestValidationError):
"""Handle request validation errors."""
logger.error(f"Validation error: {exc}")
error = ErrorDetail(
message=str(exc),
type="invalid_request_error",
code="validation_error"
)
return JSONResponse(
status_code=400,
content=ErrorResponse(error=error).dict()
)
@app.exception_handler(Exception)
async def general_exception_handler(request: Request, exc: Exception):
"""Handle general exceptions with sensitive information filtering."""
logger.error(f"Unexpected error: {exc}", exc_info=True)
# 通用错误也要过滤可能包含的敏感信息(完整错误已记录到日志)
filtered_message = OCIErrorHandler.filter_sensitive_info(str(exc))
error = ErrorDetail(
message="An unexpected error occurred", # 不暴露具体错误
type="server_error",
code="internal_error"
)
return JSONResponse(
status_code=500,
content=ErrorResponse(error=error).dict()
)
# Include routers
app.include_router(models.router, prefix=settings.api_prefix)
app.include_router(chat.router, prefix=settings.api_prefix)
app.include_router(embeddings.router, prefix=settings.api_prefix)
@app.get("/")
async def root():
"""Root endpoint."""
return {
"name": settings.api_title,
"version": settings.api_version,
"description": "OpenAI-compatible REST API for OCI Generative AI",
"endpoints": {
"models": f"{settings.api_prefix}/models",
"chat": f"{settings.api_prefix}/chat/completions",
"embeddings": f"{settings.api_prefix}/embeddings"
}
}
@app.get("/health")
async def health():
"""Health check endpoint."""
return {
"status": "healthy",
"service": "oci-genai-gateway"
}
if __name__ == "__main__":
import uvicorn
uvicorn.run(
"main:app",
host=settings.api_host,
port=settings.api_port,
reload=settings.debug,
log_level=settings.log_level.lower()
)