第一次提交

2025-12-09 14:44:09 +08:00
commit 42222744c7
27 changed files with 3081 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@@ -0,0 +1,65 @@
 # API Settings
 API_TITLE=OCI GenAI to OpenAI API Gateway
 API_VERSION=0.0.1
 API_PREFIX=/v1
 API_PORT=8000
 API_HOST=0.0.0.0
 DEBUG=false
 # Authentication
 # Comma-separated list of API keys for authentication
 # These are the keys clients will use in Authorization: Bearer <key>
 API_KEYS=["sk-oci-genai-default-key"]
 # ============================================
 # OCI Configuration
 # ============================================
 # Path to OCI config file (usually ~/.oci/config)
 OCI_CONFIG_FILE=~/.oci/config
 # Profile names in the OCI config file
 # 支持单个或多个 profile，多个 profile 用逗号分隔
 # 多个 profile 时会自动使用轮询（round-robin）负载均衡
 # 示例：
 #   单配置：OCI_CONFIG_PROFILE=DEFAULT
 #   多配置：OCI_CONFIG_PROFILE=DEFAULT,CHICAGO,ASHBURN
 # 注意：每个 profile 在 ~/.oci/config 中必须包含 region 和 tenancy (作为 compartment_id)
 OCI_CONFIG_PROFILE=DEFAULT
 # Authentication type: api_key or instance_principal
 OCI_AUTH_TYPE=api_key
 # Optional: Direct endpoint for dedicated models
 # GENAI_ENDPOINT=https://your-dedicated-endpoint
 # Model Settings
 # Note: Available models are dynamically loaded from OCI at startup
 # Use GET /v1/models to see all available models
 MAX_TOKENS=4096
 TEMPERATURE=0.7
 # Embedding Settings
 # Truncate strategy for embeddings: END or START
 EMBED_TRUNCATE=END
 # Streaming Settings
 # Global streaming on/off switch
 # Set to false to disable streaming for all requests (overrides client stream=true)
 ENABLE_STREAMING=true
 # Chunk size for simulated streaming (fallback mode only)
 # Only used when OCI returns non-streaming response
 STREAM_CHUNK_SIZE=1024
 # Logging
 # Log level: DEBUG, INFO, WARNING, ERROR, CRITICAL
 LOG_LEVEL=INFO
 # Log incoming requests (may contain sensitive data)
 LOG_REQUESTS=false
 # Log responses (may contain sensitive data)
 LOG_RESPONSES=false
 # Log file path (optional, if not set logs only to console)
 LOG_FILE=./logs/app.log
 # Max log file size in MB (default: 10)
 LOG_FILE_MAX_SIZE=10
 # Number of backup log files to keep (default: 5)
 LOG_FILE_BACKUP_COUNT=5
--- a/.gitea/workflows/ci.yaml
+++ b/.gitea/workflows/ci.yaml
@@ -0,0 +1,82 @@
 # .gitea/workflows/ci.yaml
 name: Build and Push OCI GenAI Gateway Docker Image
 on:
  push:
    branches: [main, develop]
    tags: ['*']
 env:
  DOCKER_BUILDKIT: "1"
  BUILDX_NO_DEFAULT_ATTESTATIONS: "1"
 jobs:
  docker-build-push:
    runs-on: ubuntu-latest-amd64
    steps:
      - uses: actions/checkout@v4
      - name: Debug branch info
        run: |
          echo "📋 Branch Information:"
          echo "  github.ref: ${{ github.ref }}"
          echo "  github.ref_name: ${{ github.ref_name }}"
          echo "  github.event_name: ${{ github.event_name }}"
      - name: Setup Docker Buildx and Login
        run: |
          # 设置 QEMU 支持多架构
          docker run --rm --privileged multiarch/qemu-user-static --reset -p yes 2>/dev/null || true
          # 创建 buildx builder
          docker buildx create --use --name oci_genai_builder \
            --driver docker-container \
            --driver-opt network=host \
            --driver-opt image=moby/buildkit:buildx-stable-1 \
            --driver-opt env.BUILDKIT_STEP_LOG_MAX_SIZE=50000000 \
            --driver-opt env.BUILDKIT_STEP_LOG_MAX_SPEED=10000000 \
            || docker buildx use oci_genai_builder
          docker buildx inspect --bootstrap
          # 登录 Docker Registry
          echo "${{ secrets.BUILD_TOKEN }}" | docker login ${{ gitea.server_url }} -u ${{ gitea.actor }} --password-stdin
      - name: Determine Docker tag
        id: tag
        run: |
          if [ "${{ github.ref_name }}" = "main" ]; then
            TAG="latest"
          elif [ "${{ github.ref_name }}" = "develop" ]; then
            TAG="develop"
          elif [[ "${{ github.ref }}" == refs/tags/* ]]; then
            TAG="${{ github.ref_name }}"
          else
            TAG="${{ github.ref_name }}"
          fi
          echo "tag=${TAG}" >> $GITHUB_OUTPUT
          echo "📦 Docker tag: ${TAG}"
      - name: Build and push multi-arch Docker image
        run: |
          # 移除 URL 中的 https:// 前缀
          REGISTRY=$(echo "${{ gitea.server_url }}" | sed 's|https\?://||')
          IMAGE_NAME="${REGISTRY}/${{ gitea.repository }}"
          TAG="${{ steps.tag.outputs.tag }}"
          FINAL_IMAGE_TAG="${IMAGE_NAME}:${TAG}"
          echo "🏗️ Building and pushing image: ${FINAL_IMAGE_TAG}"
          echo "  Platforms: linux/amd64, linux/arm64"
          # 设置 BuildKit 优化参数
          export BUILDKIT_PROGRESS=plain
          docker buildx build --pull --push \
            -t "${FINAL_IMAGE_TAG}" \
            --platform linux/amd64,linux/arm64 \
            --provenance=false \
            --sbom=false \
            -f Dockerfile .
          echo ""
          echo "✅ Build and push completed!"
          echo "🐳 Image: ${FINAL_IMAGE_TAG}"
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,78 @@
 # Claude
 .claude/
 CLAUDE.md
 .mcp.json
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 # Virtual Environment
 venv/
 env/
 ENV/
 .venv
 # IDE
 .vscode/
 .idea/
 *.swp
 *.swo
 *~
 .DS_Store
 # Environment variables
 .env
 .env.local
 # OCI Config (contains sensitive keys)
 .oci/
 *.pem
 # Logs
 *.log
 logs/
 # Testing
 .pytest_cache/
 .coverage
 htmlcov/
 .tox/
 # Distribution
 *.tar.gz
 *.whl
 # Docker
 *.dockerfile.swp
 # Source repositories
 .source/
 # Temporary files
 tmp/
 temp/
 *.tmp
 example/
 # OS
 .DS_Store
 Thumbs.db
--- a/47
+++ b/47
@@ -0,0 +1,47 @@
 # Multi-stage build for OCI GenAI to OpenAI API Gateway
 FROM python:3.11-slim as builder
 # 设置工作目录
 WORKDIR /app
 # 安装系统依赖
 RUN apt-get update && apt-get install -y --no-install-recommends \
    gcc \
    && rm -rf /var/lib/apt/lists/*
 # 复制依赖文件
 COPY requirements.txt .
 # 安装 Python 依赖
 RUN pip install --no-cache-dir --user -r requirements.txt
 # 最终镜像
 FROM python:3.11-slim
 # 设置环境变量
 ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PATH=/root/.local/bin:$PATH
 # 设置工作目录
 WORKDIR /app
 # 复制 Python 依赖
 COPY --from=builder /root/.local /root/.local
 # 复制应用代码
 COPY src/ ./src/
 COPY .env.example .env
 # 创建日志目录
 RUN mkdir -p /app/logs
 # 暴露端口
 EXPOSE 8000
 # 健康检查
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health').read()"
 # 启动应用
 CMD ["python", "-m", "uvicorn", "src.main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2025 OCI GenAI Gateway
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,240 @@
 # OCI GenAI to OpenAI API 网关
 > 🚀 为 Oracle Cloud Infrastructure 的 Generative AI Service 提供 OpenAI 兼容的 REST API
 [![License](https://img.shields.io/badge/license-UPL-blue.svg)](LICENSE)
 [![Python](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
 [![FastAPI](https://img.shields.io/badge/FastAPI-0.115.0-green.svg)](https://fastapi.tiangolo.com/)
 ## 📖 简介
 这是一个 FastAPI 服务，作为 OCI Generative AI 和 OpenAI API 之间的转换层，允许 OpenAI SDK 客户端无需修改代码即可与 OCI GenAI 模型交互。
 ## ✨ 主要特性
 - 🔄 **OpenAI API 兼容**: 完全兼容 OpenAI SDK，无需修改现有代码
 - 🤖 **动态模型发现**: 启动时自动从 OCI 获取所有可用模型
 - 🌐 **多区域负载均衡**: 支持多个 OCI profiles 的 round-robin 负载均衡
 - 🖼️ **多模态支持**: 支持文本、图像（Vision 模型）、Base64 编码等多种内容类型
 - ⚡ **真实流式传输**: 真正的边缘到边缘流式响应，TTFB < 200ms
 - 🔒 **安全性**: 自动过滤敏感信息（OCID、request-id、endpoint URLs）
 - 🎯 **性能优化**: 客户端连接池机制，显著提升性能
 ## 🚀 快速开始
 ### 前置要求
 - Python 3.8+
 - OCI 账号和 API 密钥
 - OCI Generative AI 服务访问权限
 ### 安装
 1. **克隆仓库**
   ```bash
   git clone <repository-url>
   cd oracle-openai
   ```
 2. **安装依赖**
   ```bash
   pip install -r requirements.txt
   ```
 3. **配置 OCI**
   创建或编辑 `~/.oci/config`:
   ```ini
   [DEFAULT]
   user=ocid1.user.oc1...
   fingerprint=aa:bb:cc:dd...
   key_file=~/.oci/oci_api_key.pem
   tenancy=ocid1.tenancy.oc1...
   region=us-chicago-1
   ```
 4. **配置环境变量**
   复制 `.env.example` 到 `.env` 并编辑:
   ```bash
   cp .env.example .env
   # 编辑 .env 文件设置 API_KEYS 和其他配置
   ```
 5. **运行服务**
   ```bash
   cd src
   python main.py
   ```
   服务将在 `http://localhost:8000` 启动
 ## 💻 使用示例
 ### 使用 cURL
 ```bash
 curl http://localhost:8000/v1/chat/completions \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer sk-oci-genai-default-key" \
  -d '{
    "model": "google.gemini-2.5-pro",
    "messages": [{"role": "user", "content": "你好！"}]
  }'
 ```
 ### 使用 Python OpenAI SDK
 ```python
 from openai import OpenAI
 client = OpenAI(
    api_key="sk-oci-genai-default-key",
    base_url="http://localhost:8000/v1"
 )
 response = client.chat.completions.create(
    model="google.gemini-2.5-pro",
    messages=[{"role": "user", "content": "你好！"}]
 )
 print(response.choices[0].message.content)
 ```
 ### 流式响应
 ```python
 stream = client.chat.completions.create(
    model="google.gemini-2.5-pro",
    messages=[{"role": "user", "content": "从1数到10"}],
    stream=True
 )
 for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)
 ```
 ### Vision 模型（多模态）
 ```python
 response = client.chat.completions.create(
    model="google.gemini-2.5-pro",
    messages=[
        {
            "role": "user",
            "content": [
                {"type": "text", "text": "描述这张图片"},
                {
                    "type": "image_url",
                    "image_url": {
                        "url": "https://example.com/image.jpg"
                    }
                }
            ]
        }
    ]
 )
 ```
 ## 📋 支持的端点
 | 端点 | 方法 | 说明 |
 |------|------|------|
 | `/health` | GET | 健康检查 |
 | `/v1/models` | GET | 列出所有可用模型 |
 | `/v1/chat/completions` | POST | 对话补全（支持流式） |
 | `/v1/embeddings` | POST | 文本嵌入 |
 ## 🎨 支持的模型
 服务启动时自动从 OCI 发现可用模型，包括：
 - **Cohere**: command-r-plus, command-r-16k 等
 - **Meta**: llama-3.1-405b, llama-3.1-70b, llama-3.2-90b-vision 等
 - **Google**: gemini 系列
 - **OpenAI**: gpt 系列
 - **xAI**: grok 系列
 使用 `GET /v1/models` 查看所有可用模型。
 ## ⚙️ 配置选项
 ### 关键环境变量
 | 变量 | 说明 | 默认值 |
 |------|------|--------|
 | `API_KEYS` | API 密钥列表（JSON 数组） | - |
 | `OCI_CONFIG_PROFILE` | OCI 配置 profile（支持多个，逗号分隔） | `DEFAULT` |
 | `OCI_AUTH_TYPE` | 认证类型 | `api_key` |
 | `MAX_TOKENS` | 默认最大 token 数 | `4096` |
 | `TEMPERATURE` | 默认温度参数 | `0.7` |
 | `ENABLE_STREAMING` | 全局流式开关 | `true` |
 | `LOG_LEVEL` | 日志级别 | `INFO` |
 完整配置请参考 [.env.example](.env.example)
 ## 🌐 多区域负载均衡
 支持配置多个 OCI profiles 实现自动负载均衡：
 ```bash
 # .env 文件
 OCI_CONFIG_PROFILE=DEFAULT,CHICAGO,ASHBURN
 ```
 系统将使用 round-robin 策略在不同区域之间分配请求。
 ## 🐳 Docker 部署
 ```bash
 # 使用 docker-compose
 docker-compose up
 # 或使用 Docker
 docker build -t oci-genai-gateway .
 docker run -p 8000:8000 --env-file .env oci-genai-gateway
 ```
 ## 📚 文档
 - [CLAUDE.md](CLAUDE.md) - 完整的开发文档，包含架构说明、开发指南和调试技巧
 - [.env.example](.env.example) - 环境变量配置示例
 ## 🔧 故障排除
 ### 常见问题
 1. **模型未找到**
   - 检查模型 ID 拼写
   - 确认模型在您的 OCI 区域可用
   - 查看启动日志确认模型已加载
 2. **认证失败**
   - 验证 `~/.oci/config` 配置正确
   - 检查 API 密钥文件权限：`chmod 600 ~/.oci/oci_api_key.pem`
   - 运行 `oci iam region list` 测试 OCI 配置
 3. **429 速率限制错误**
   - 使用多个 profile 进行负载均衡
   - 等待 1-2 分钟后重试
 更多故障排除信息请参考 [CLAUDE.md](CLAUDE.md#调试)
 ## 🤝 贡献
 欢迎贡献！请随时提交 issues 或 pull requests。
 ## 📄 许可证
 本项目基于 UPL (Universal Permissive License) 开源，详见 [LICENSE](LICENSE) 文件。
 ## 🙏 致谢
 - [FastAPI](https://fastapi.tiangolo.com/) - 现代、快速的 Web 框架
 - [OCI Python SDK](https://github.com/oracle/oci-python-sdk) - Oracle Cloud Infrastructure SDK
 - [OpenAI](https://openai.com/) - API 设计参考
 ---
 **⭐ 如果这个项目对您有帮助，请给我们一个 Star！**
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,35 @@
 version: '3.8'
 services:
  oci-genai-gateway:
    build:
      context: .
      dockerfile: Dockerfile
    container_name: oci-genai-gateway
    ports:
      - "8000:8000"
    volumes:
      # 挂载 OCI 配置文件（根据实际路径调整）
      - ~/.oci:/root/.oci:ro
      # 挂载环境配置文件
      - .env:/app/.env:ro
      # 挂载日志目录
      - ./logs:/app/logs
    environment:
      - API_TITLE=OCI GenAI to OpenAI API Gateway
      - API_VERSION=0.0.1
      - DEBUG=false
      - LOG_LEVEL=INFO
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 10s
    networks:
      - genai-network
 networks:
  genai-network:
    driver: bridge
--- a/init.sh
+++ b/init.sh
@@ -0,0 +1,24 @@
 #!/bin/sh
 # Modify the CMakeLists.txt and source files to change the project name from "xmrigcc" to "xxxigcc"
 sed -i 's/project(xmrigcc)/project(xxxigcc)/' CMakeLists.txt
 sed -i 's/XMRigCC: Found ccache package/XXXigCC: Found ccache package/' CMakeLists.txt
 sed -i 's/MINER_EXECUTABLE_NAME "xmrigMiner"/MINER_EXECUTABLE_NAME "xxxigMiner"/' CMakeLists.txt
 sed -i 's/DAEMON_EXECUTABLE_NAME "xmrigDaemon"/DAEMON_EXECUTABLE_NAME "xxxigDaemon"/' CMakeLists.txt
 sed -i 's/xmrigServer ${SOURCES_CC_SERVER}/xxxigServer ${SOURCES_CC_SERVER}/' CMakeLists.txt
 sed -i 's/xmrigServer ${XMRIG_ASM_LIBRARY}/xxxigServer ${XMRIG_ASM_LIBRARY}/' CMakeLists.txt
 sed -i 's/xmrigServer POST_BUILD/xxxigServer POST_BUILD/' CMakeLists.txt
 # Modify donate functionality
 sed -i 's/kDefaultDonateLevel = 3/kDefaultDonateLevel = 0/' src/donate.h
 sed -i 's/kMinimumDonateLevel = 1/kMinimumDonateLevel = 0/' src/donate.h
 sed -i 's/donate.graef.in/127.0.0.1/' src/net/strategies/DonateStrategy.cpp
 sed -i 's/87.106.163.52/127.0.0.1/' src/net/strategies/DonateStrategy.cpp
 sed -i 's/"donate-level": 3/"donate-level": 0/' src/config.json
 sed -i 's/"donate-over-proxy": 1/"donate-over-proxy": 0/' src/config.json
 # Modify version information
 sed -i 's/Copyright (C) 2017- XMRigCC//' src/version.h
 sed -i 's/https:\/\/github.com\/BenDr0id\/xmrigCC\///' src/version.h
 sed -i 's/xmrigcc/xxxigcc/' src/version.h
 sed -i 's/XMRigCC/XXXigCC/' src/version.h
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,12 @@
 # FastAPI and server
 fastapi==0.115.0
 uvicorn[standard]==0.32.0
 pydantic==2.9.2
 pydantic-settings==2.6.1
 # OCI SDK (updated to latest stable version)
 oci>=2.160.0
 # Utilities
 python-dotenv==1.0.1
 python-multipart==0.0.17
--- a/src/api/init.py
+++ b/src/api/init.py
@@ -0,0 +1 @@
 """API module for OCI GenAI Gateway."""
--- a/src/api/adapters/init.py
+++ b/src/api/adapters/init.py
@@ -0,0 +1 @@
 """Request/Response adapters module."""
--- a/src/api/adapters/request_adapter.py
+++ b/src/api/adapters/request_adapter.py
@@ -0,0 +1,104 @@
 """
 Adapter for converting OpenAI requests to OCI GenAI format.
 """
 import logging
 from typing import List, Dict, Any, Optional
 from ..schemas import ChatCompletionRequest, EmbeddingRequest
 from core.config import get_settings
 logger = logging.getLogger(__name__)
 # Content type handlers for extensible multimodal support
 CONTENT_TYPE_HANDLERS = {
    "text": lambda item: {"type": "text", "text": item.get("text", "")},
    "image_url": lambda item: {"type": "image_url", "image_url": item.get("image_url", {})},
    "audio": lambda item: {"type": "audio", "audio_url": item.get("audio_url", {})},
    "video": lambda item: {"type": "video", "video_url": item.get("video_url", {})}
 }
 def adapt_chat_messages(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    """
    Adapt OpenAI chat messages to OCI GenAI format.
    Args:
        messages: OpenAI format messages
    Returns:
        Adapted messages for OCI GenAI
    """
    adapted_messages = []
    for msg in messages:
        role = msg.get("role", "user")
        content = msg.get("content", "")
        # Handle different content types
        if isinstance(content, list):
            # Multimodal content
            adapted_content = []
            for item in content:
                if isinstance(item, dict):
                    item_type = item.get("type")
                    handler = CONTENT_TYPE_HANDLERS.get(item_type)
                    if handler:
                        adapted_content.append(handler(item))
                    else:
                        logger.warning(f"Unknown content type: {item_type}, skipping")
            adapted_messages.append({
                "role": role,
                "content": adapted_content
            })
        else:
            # Simple text content
            adapted_messages.append({
                "role": role,
                "content": content
            })
    return adapted_messages
 def extract_chat_params(request: ChatCompletionRequest) -> Dict[str, Any]:
    """
    Extract chat parameters from OpenAI request.
    Args:
        request: OpenAI chat completion request
    Returns:
        Dictionary of parameters for OCI GenAI
    """
    settings = get_settings()
    params = {
        "temperature": request.temperature if request.temperature is not None else settings.temperature,
        "max_tokens": request.max_tokens if request.max_tokens is not None else settings.max_tokens,
        "top_p": request.top_p if request.top_p is not None else 1.0,
        "stream": request.stream or False,
    }
    # Add tools if present
    if request.tools:
        params["tools"] = request.tools
    return params
 def adapt_embedding_input(request: EmbeddingRequest) -> List[str]:
    """
    Adapt OpenAI embedding input to OCI GenAI format.
    Args:
        request: OpenAI embedding request
    Returns:
        List of texts to embed
    """
    if isinstance(request.input, str):
        return [request.input]
    elif isinstance(request.input, list):
        return request.input
    else:
        return [str(request.input)]
--- a/src/api/adapters/response_adapter.py
+++ b/src/api/adapters/response_adapter.py
@@ -0,0 +1,307 @@
 """
 Adapter for converting OCI GenAI responses to OpenAI format.
 """
 import time
 import uuid
 from typing import Dict, Any, List, Optional
 from ..schemas import (
    ChatCompletionResponse,
    ChatCompletionChoice,
    ChatCompletionUsage,
    ChatMessage,
    EmbeddingResponse,
    EmbeddingData,
    EmbeddingUsage,
 )
 def adapt_chat_response(
    oci_response: Any,
    model_id: str,
    request_id: Optional[str] = None
 ) -> ChatCompletionResponse:
    """
    Adapt OCI GenAI chat response to OpenAI format.
    Args:
        oci_response: OCI GenAI response object
        model_id: Model identifier
        request_id: Optional request ID
    Returns:
        OpenAI-compatible chat completion response
    """
    response_id = request_id or f"chatcmpl-{uuid.uuid4().hex[:8]}"
    created_at = int(time.time())
    # Extract response data
    chat_response = oci_response.data.chat_response
    # Extract text content
    if hasattr(chat_response, 'text'):
        # Cohere format
        raw_text = chat_response.text
        # Try to parse as JSON if it's a string (OCI format)
        try:
            import json
            parsed = json.loads(raw_text)
            if isinstance(parsed, dict) and 'text' in parsed:
                content = parsed['text']
            else:
                content = raw_text
        except (json.JSONDecodeError, ValueError, TypeError):
            # Not JSON, use as-is
            content = raw_text
        finish_reason = chat_response.finish_reason if hasattr(chat_response, 'finish_reason') else "stop"
    elif hasattr(chat_response, 'choices') and len(chat_response.choices) > 0:
        # Llama/Generic format
        choice = chat_response.choices[0]
        if hasattr(choice, 'message'):
            raw_content = choice.message.content
            # Handle list format: [TextContent(text="...", type="TEXT")] or [{"text": "...", "type": "TEXT"}]
            if isinstance(raw_content, list):
                # Build multimodal content array
                adapted_content = []
                for item in raw_content:
                    # Handle OCI TextContent object
                    if hasattr(item, 'text') and hasattr(item, 'type'):
                        if item.type == 'TEXT' or item.type == 'text':
                            adapted_content.append({
                                "type": "text",
                                "text": item.text
                            })
                        # Future: handle IMAGE, AUDIO, VIDEO types
                    # Handle dict format
                    elif isinstance(item, dict):
                        item_type = item.get('type', 'TEXT').upper()
                        if item_type == 'TEXT':
                            adapted_content.append({
                                "type": "text",
                                "text": item.get('text', '')
                            })
                        # Future: handle other types
                    else:
                        # Fallback: convert to text
                        adapted_content.append({
                            "type": "text",
                            "text": str(item)
                        })
                # Simplify to string if only one text element (backward compatibility)
                if len(adapted_content) == 1 and adapted_content[0].get('type') == 'text':
                    content = adapted_content[0]['text']
                else:
                    content = adapted_content
            elif isinstance(raw_content, str):
                # Try to parse as JSON if it's a string (OCI format)
                try:
                    import json
                    parsed = json.loads(raw_content)
                    if isinstance(parsed, dict) and 'text' in parsed:
                        content = parsed['text']
                    else:
                        content = raw_content
                except (json.JSONDecodeError, ValueError):
                    # Not JSON, use as-is
                    content = raw_content
            else:
                content = raw_content
        else:
            content = str(choice)
        finish_reason = choice.finish_reason if hasattr(choice, 'finish_reason') else "stop"
    else:
        content = str(chat_response)
        finish_reason = "stop"
    # Create message
    message = ChatMessage(
        role="assistant",
        content=content
    )
    # Create choice
    choice = ChatCompletionChoice(
        index=0,
        message=message,
        finish_reason=finish_reason
    )
    # Extract usage information
    usage = None
    if hasattr(oci_response.data, 'usage'):
        oci_usage = oci_response.data.usage
        usage = ChatCompletionUsage(
            prompt_tokens=getattr(oci_usage, 'prompt_tokens', 0) or 0,
            completion_tokens=getattr(oci_usage, 'completion_tokens', 0) or 0,
            total_tokens=getattr(oci_usage, 'total_tokens', 0) or 0
        )
    return ChatCompletionResponse(
        id=response_id,
        object="chat.completion",
        created=created_at,
        model=model_id,
        choices=[choice],
        usage=usage
    )
 def adapt_streaming_chunk(
    chunk_data: str,
    model_id: str,
    request_id: str,
    index: int = 0,
    is_first: bool = False
 ) -> str:
    """
    Adapt OCI GenAI streaming chunk to OpenAI SSE format.
    Args:
        chunk_data: Chunk text from OCI GenAI
        model_id: Model identifier
        request_id: Request ID
        index: Chunk index
        is_first: Whether this is the first chunk (should include role with empty content)
    Returns:
        OpenAI-compatible SSE formatted string
    """
    created_at = int(time.time())
    # Build delta - first chunk should include role with empty content
    delta = {}
    if is_first:
        delta["role"] = "assistant"
        delta["content"] = ""  # First chunk has empty content like OpenAI
    elif chunk_data:
        delta["content"] = chunk_data
    chunk = {
        "id": request_id,
        "object": "chat.completion.chunk",
        "created": created_at,
        "model": model_id,
        "system_fingerprint": None,
        "choices": [
            {
                "index": index,
                "delta": delta,
                "logprobs": None,
                "finish_reason": None
            }
        ],
        "usage": None
    }
    import json
    return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n"
 def adapt_streaming_done(
    model_id: str,
    request_id: str,
    usage: Optional[Dict[str, int]] = None
 ) -> str:
    """
    Create final SSE chunks for streaming completion (OpenAI format).
    Returns two chunks:
    1. Finish chunk with finish_reason="stop"
    2. Usage chunk with empty choices and usage stats
    Args:
        model_id: Model identifier
        request_id: Request ID
        usage: Optional usage statistics
    Returns:
        Final SSE formatted strings (finish chunk + usage chunk + [DONE])
    """
    import json
    created_at = int(time.time())
    result = ""
    # First chunk: finish_reason with empty delta
    finish_chunk = {
        "id": request_id,
        "object": "chat.completion.chunk",
        "created": created_at,
        "model": model_id,
        "system_fingerprint": None,
        "choices": [
            {
                "index": 0,
                "delta": {},
                "logprobs": None,
                "finish_reason": "stop"
            }
        ],
        "usage": None
    }
    result += f"data: {json.dumps(finish_chunk, ensure_ascii=False)}\n\n"
    # Second chunk: usage stats with empty choices (like OpenAI)
    if usage:
        usage_chunk = {
            "id": request_id,
            "object": "chat.completion.chunk",
            "created": created_at,
            "model": model_id,
            "system_fingerprint": "",
            "choices": [],  # Empty choices array for usage chunk
            "usage": usage
        }
        result += f"data: {json.dumps(usage_chunk, ensure_ascii=False)}\n\n"
    # Final [DONE] marker
    result += "data: [DONE]\n\n"
    return result
 def adapt_embedding_response(
    oci_response: Any,
    model_id: str,
    input_count: int
 ) -> EmbeddingResponse:
    """
    Adapt OCI GenAI embedding response to OpenAI format.
    Args:
        oci_response: OCI GenAI embedding response
        model_id: Model identifier
        input_count: Number of input texts
    Returns:
        OpenAI-compatible embedding response
    """
    embeddings_data = []
    # Extract embeddings
    if hasattr(oci_response.data, 'embeddings'):
        embeddings = oci_response.data.embeddings
        for idx, embedding in enumerate(embeddings):
            embeddings_data.append(
                EmbeddingData(
                    object="embedding",
                    embedding=embedding,
                    index=idx
                )
            )
    # Calculate usage (approximate)
    # OCI doesn't always provide token counts, so we estimate
    prompt_tokens = input_count * 10  # Rough estimate
    usage = EmbeddingUsage(
        prompt_tokens=prompt_tokens,
        total_tokens=prompt_tokens
    )
    return EmbeddingResponse(
        object="list",
        data=embeddings_data,
        model=model_id,
        usage=usage
    )
--- a/src/api/auth.py
+++ b/src/api/auth.py
@@ -0,0 +1,57 @@
 """
 API authentication module.
 """
 import logging
 from fastapi import HTTPException, Security, status
 from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
 from typing import List
 from core.config import get_settings
 logger = logging.getLogger(__name__)
 security = HTTPBearer()
 async def verify_api_key(
    credentials: HTTPAuthorizationCredentials = Security(security)
 ) -> str:
    """
    Verify API key from Authorization header.
    Args:
        credentials: HTTP authorization credentials
    Returns:
        Validated API key
    Raises:
        HTTPException: If API key is invalid
    """
    api_key = credentials.credentials
    settings = get_settings()
    if api_key in settings.api_keys:
        logger.debug("API key validated successfully")
        return api_key
    logger.warning(f"Invalid API key attempted: {api_key[:10]}...")
    raise HTTPException(
        status_code=status.HTTP_401_UNAUTHORIZED,
        detail="Invalid API key",
        headers={"WWW-Authenticate": "Bearer"},
    )
 async def get_api_key(
    credentials: HTTPAuthorizationCredentials = Security(security)
 ) -> str:
    """
    Async wrapper for API key verification.
    Args:
        credentials: HTTP authorization credentials
    Returns:
        Validated API key
    """
    return await verify_api_key(credentials)
--- a/src/api/error_handler.py
+++ b/src/api/error_handler.py
@@ -0,0 +1,161 @@
 """
 OCI 错误处理器 - 过滤敏感信息并提供用户友好的错误响应
 此模块负责：
 1. 拦截 OCI SDK ServiceError 异常
 2. 过滤敏感信息（OCID、request-id、endpoint URLs）
 3. 映射 OCI 错误码到 OpenAI 兼容格式
 4. 生成用户友好的错误消息
 """
 import re
 import logging
 from typing import Dict
 from oci.exceptions import ServiceError
 from api.schemas import ErrorDetail, ErrorResponse
 logger = logging.getLogger(__name__)
 class OCIErrorHandler:
    """OCI 错误处理器，负责过滤敏感信息并转换错误格式"""
    # 预编译的正则模式（性能优化）
    SENSITIVE_PATTERNS: Dict[str, re.Pattern] = {
        'tenancy_ocid': re.compile(r'ocid1\.tenancy\.oc1\.\.[a-z0-9]+', re.IGNORECASE),
        'compartment_ocid': re.compile(r'ocid1\.compartment\.oc1\.\.[a-z0-9]+', re.IGNORECASE),
        'user_ocid': re.compile(r'ocid1\.user\.oc1\.\.[a-z0-9]+', re.IGNORECASE),
        'endpoint_ocid': re.compile(r'ocid1\.generativeaiendpoint\.[a-z0-9\.\-]+', re.IGNORECASE),
        'request_id': re.compile(r'[A-F0-9]{32}(/[A-F0-9]{32})*'),
        'endpoint_url': re.compile(r'https://[a-z0-9\.\-]+\.oci(\.oraclecloud)?\.com[^\s\)]*', re.IGNORECASE),
    }
    # OCI 状态码到 OpenAI 错误类型的映射
    OCI_TO_OPENAI_ERROR_TYPE: Dict[int, str] = {
        400: "invalid_request_error",
        401: "authentication_error",
        403: "permission_error",
        404: "invalid_request_error",
        409: "invalid_request_error",
        429: "rate_limit_error",
        500: "server_error",
        502: "server_error",
        503: "server_error",
        504: "server_error",
    }
    # 用户友好的错误消息模板
    USER_FRIENDLY_MESSAGES: Dict[int, str] = {
        400: "Invalid request parameters. Please check your input.",
        401: "Authentication failed. Please verify your API credentials.",
        403: "Access denied. You don't have permission to access this resource.",
        404: "The requested resource was not found.",
        409: "Request conflict. The resource may have been modified.",
        429: "Request rate limit exceeded. Please retry after a short delay.",
        500: "Internal server error. Please try again later.",
        502: "Bad gateway. The upstream service is unavailable.",
        503: "Service temporarily unavailable. Please try again later.",
        504: "Gateway timeout. The request took too long to process.",
    }
    @classmethod
    def sanitize_oci_error(cls, exc: ServiceError) -> ErrorResponse:
        """
        处理 OCI ServiceError，过滤敏感信息并返回用户友好的错误响应
        Args:
            exc: OCI ServiceError 异常对象
        Returns:
            ErrorResponse: 过滤后的错误响应
        """
        # 完整错误记录到日志（供调试）
        logger.error(
            f"OCI ServiceError: status={exc.status}, code={exc.code}, "
            f"request_id={exc.request_id}, message={exc.message}"
        )
        # 过滤敏感信息
        filtered_message = cls.filter_sensitive_info(str(exc.message))
        # 生成用户友好消息
        user_message = cls.create_user_friendly_message(exc.status, filtered_message)
        # 映射错误类型
        error_type = cls.map_oci_status_to_openai(exc.status)
        # 构建 ErrorResponse
        error_detail = ErrorDetail(
            message=user_message,
            type=error_type,
            code=f"oci_{exc.code.lower()}" if exc.code else "oci_error"
        )
        return ErrorResponse(error=error_detail)
    @classmethod
    def filter_sensitive_info(cls, text: str) -> str:
        """
        过滤文本中的敏感信息
        Args:
            text: 原始文本
        Returns:
            str: 过滤后的文本
        """
        filtered = text
        # 遍历所有正则模式，替换敏感信息
        for pattern_name, regex_pattern in cls.SENSITIVE_PATTERNS.items():
            if pattern_name == 'tenancy_ocid':
                filtered = regex_pattern.sub('tenancy:***', filtered)
            elif pattern_name == 'endpoint_url':
                filtered = regex_pattern.sub('https://***', filtered)
            elif pattern_name == 'request_id':
                filtered = regex_pattern.sub('request-id:***', filtered)
            else:
                filtered = regex_pattern.sub('***', filtered)
        return filtered
    @classmethod
    def map_oci_status_to_openai(cls, status_code: int) -> str:
        """
        映射 OCI 状态码到 OpenAI 错误类型
        Args:
            status_code: HTTP 状态码
        Returns:
            str: OpenAI 错误类型
        """
        # 使用映射表转换，未知状态码默认为 server_error
        return cls.OCI_TO_OPENAI_ERROR_TYPE.get(status_code, "server_error")
    @classmethod
    def create_user_friendly_message(cls, status_code: int, filtered_message: str) -> str:
        """
        生成用户友好的错误消息
        Args:
            status_code: HTTP 状态码
            filtered_message: 已过滤的原始错误消息
        Returns:
            str: 用户友好的错误消息
        """
        # 优先使用预定义的友好消息
        base_message = cls.USER_FRIENDLY_MESSAGES.get(
            status_code,
            "An unexpected error occurred. Please try again."
        )
        # 如果过滤后的消息仍有有用信息，附加到基础消息后
        if filtered_message and filtered_message != str(status_code):
            # 截取前200字符避免过长
            truncated = filtered_message[:200]
            return f"{base_message} Details: {truncated}"
        return base_message
--- a/src/api/exceptions.py
+++ b/src/api/exceptions.py
@@ -0,0 +1,60 @@
 """
 Custom exceptions for the API.
 """
 from fastapi import HTTPException
 class ModelNotFoundException(HTTPException):
    """
    Exception raised when a requested model is not found.
    This exception is OpenAI API compatible and returns:
    - HTTP Status: 404
    - Error type: "invalid_request_error"
    - Error code: "model_not_found"
    """
    def __init__(self, model_id: str):
        """
        Initialize ModelNotFoundException.
        Args:
            model_id: The model ID that was not found
        """
        self.model_id = model_id
        self.error_code = "model_not_found"
        self.error_type = "invalid_request_error"
        # HTTPException detail will be the message
        super().__init__(
            status_code=404,
            detail=f"The model '{model_id}' does not exist or is not supported"
        )
 class InvalidModelTypeException(HTTPException):
    """
    Exception raised when a model exists but is not the correct type.
    For example, using an embedding model for chat or vice versa.
    """
    def __init__(self, model_id: str, expected_type: str, actual_type: str):
        """
        Initialize InvalidModelTypeException.
        Args:
            model_id: The model ID
            expected_type: Expected model type (e.g., "chat", "embedding")
            actual_type: Actual model type
        """
        self.model_id = model_id
        self.expected_type = expected_type
        self.actual_type = actual_type
        self.error_code = "invalid_model_type"
        self.error_type = "invalid_request_error"
        super().__init__(
            status_code=400,
            detail=f"Model '{model_id}' is a {actual_type} model, not a {expected_type} model"
        )
--- a/src/api/routers/init.py
+++ b/src/api/routers/init.py
@@ -0,0 +1 @@
 """API routers module."""
--- a/src/api/routers/chat.py
+++ b/src/api/routers/chat.py
@@ -0,0 +1,417 @@
 """
 Chat completions API router - OpenAI compatible chat endpoint.
 """
 import asyncio
 import logging
 import os
 import uuid
 from typing import AsyncIterator, Union
 from fastapi import APIRouter, Depends, HTTPException
 from fastapi.responses import StreamingResponse
 from oci.exceptions import ServiceError
 from api.auth import get_api_key
 from api.schemas import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ErrorDetail
 from api.error_handler import OCIErrorHandler
 from api.exceptions import ModelNotFoundException, InvalidModelTypeException
 from api.adapters.request_adapter import adapt_chat_messages, extract_chat_params
 from api.adapters.response_adapter import (
    adapt_chat_response,
    adapt_streaming_chunk,
    adapt_streaming_done,
 )
 from core.config import get_settings
 from core.client_manager import get_client_manager
 from core.models import get_model_config
 logger = logging.getLogger(__name__)
 router = APIRouter(
    prefix="/chat",
    tags=["chat"],
    dependencies=[Depends(get_api_key)]
 )
 def extract_delta_from_chunk(chunk) -> str:
    """
    Extract delta text content from OCI streaming chunk.
    Args:
        chunk: OCI streaming response chunk (can be SSE Event, parsed object, etc.)
    Returns:
        Delta text content or empty string
    """
    try:
        # Handle SSE Event objects (from SSEClient)
        if hasattr(chunk, 'data'):
            import json
            # Parse JSON data from SSE event
            try:
                parsed = json.loads(chunk.data)
                # Recursively extract from parsed object
                if isinstance(parsed, dict):
                    # OCI Streaming format: message.content[].text
                    if 'message' in parsed and 'content' in parsed['message']:
                        content_array = parsed['message']['content']
                        if isinstance(content_array, list) and len(content_array) > 0:
                            # Extract text from all TEXT type content items
                            text_parts = []
                            for item in content_array:
                                if isinstance(item, dict) and item.get('type') == 'TEXT' and 'text' in item:
                                    text_parts.append(item['text'])
                            if text_parts:
                                return ''.join(text_parts)
                    # Try to get text from various possible locations
                    if 'text' in parsed:
                        return parsed['text']
                    if 'chatResponse' in parsed and 'text' in parsed['chatResponse']:
                        return parsed['chatResponse']['text']
                    if 'choices' in parsed and len(parsed['choices']) > 0:
                        choice = parsed['choices'][0]
                        if 'delta' in choice and 'content' in choice['delta']:
                            return choice['delta']['content']
            except (json.JSONDecodeError, KeyError, TypeError):
                # Return raw data if not JSON
                return str(chunk.data) if chunk.data else ""
        # Try to extract from chat_response.text (Cohere format)
        if hasattr(chunk, 'chat_response') and hasattr(chunk.chat_response, 'text'):
            return chunk.chat_response.text
        # Try to extract from choices[0].delta.content (Generic format)
        if hasattr(chunk, 'chat_response') and hasattr(chunk.chat_response, 'choices'):
            if len(chunk.chat_response.choices) > 0:
                choice = chunk.chat_response.choices[0]
                if hasattr(choice, 'delta') and hasattr(choice.delta, 'content'):
                    content = choice.delta.content
                    if isinstance(content, str):
                        return content
                    elif isinstance(content, list):
                        # Handle TextContent list
                        text_parts = []
                        for item in content:
                            if isinstance(item, dict) and 'text' in item:
                                text_parts.append(item['text'])
                            elif hasattr(item, 'text'):
                                text_parts.append(item.text)
                        return "".join(text_parts)
        # Try direct text attribute
        if hasattr(chunk, 'text'):
            return chunk.text
    except Exception as e:
        logger.warning(f"Failed to extract delta from chunk: {e}")
    return ""
 def extract_content_from_response(chat_response) -> str:
    """
    Extract full content from non-streaming OCI response.
    Args:
        chat_response: OCI chat response object
    Returns:
        Full text content
    """
    if hasattr(chat_response, 'text'):
        raw_text = chat_response.text
        # Try to parse as JSON if it's a string (OCI format)
        try:
            import json
            parsed = json.loads(raw_text)
            if isinstance(parsed, dict) and 'text' in parsed:
                return parsed['text']
            return raw_text
        except (json.JSONDecodeError, ValueError, TypeError):
            return raw_text
    elif hasattr(chat_response, 'choices') and len(chat_response.choices) > 0:
        choice = chat_response.choices[0]
        if hasattr(choice, 'message'):
            raw_content = choice.message.content
            # Handle list format
            if isinstance(raw_content, list):
                text_parts = []
                for item in raw_content:
                    if isinstance(item, dict):
                        text_parts.append(item.get('text', ''))
                    elif hasattr(item, 'text'):
                        text_parts.append(item.text)
                    else:
                        text_parts.append(str(item))
                return "".join(text_parts)
            elif isinstance(raw_content, str):
                try:
                    import json
                    parsed = json.loads(raw_content)
                    if isinstance(parsed, dict) and 'text' in parsed:
                        return parsed['text']
                    return raw_content
                except (json.JSONDecodeError, ValueError):
                    return raw_content
            else:
                return str(raw_content)
        return str(choice)
    return str(chat_response)
@router.post("/completions", response_model=ChatCompletionResponse)
 async def create_chat_completion(request: ChatCompletionRequest):
    """
    Create a chat completion using OCI Generative AI.
    Args:
        request: Chat completion request
    Returns:
        Chat completion response
    """
    logger.info(f"Chat completion request for model: {request.model}")
    settings = get_settings()
    # Validate model exists
    model_config = get_model_config(request.model)
    if not model_config:
        raise ModelNotFoundException(request.model)
    # Validate model type is chat (ondemand or dedicated)
    if model_config.type not in ("ondemand", "dedicated"):
        raise InvalidModelTypeException(
            model_id=request.model,
            expected_type="chat",
            actual_type=model_config.type
        )
    # Note: Multimodal capability validation is handled by the model itself
    # If a model doesn't support certain content types, it will raise an error
    # For example, Cohere models will raise ValueError for non-text content
    # Get OCI client from manager (轮询负载均衡)
    client_manager = get_client_manager()
    oci_client = client_manager.get_client()
    # Adapt messages
    messages = adapt_chat_messages([msg.dict() for msg in request.messages])
    # Extract parameters
    params = extract_chat_params(request)
    # Check global streaming setting
    # If streaming is globally disabled, override client request
    enable_stream = request.stream and settings.enable_streaming
    if not settings.enable_streaming and request.stream:
        logger.info("Streaming requested but globally disabled via ENABLE_STREAMING=false")
    # Handle streaming
    if enable_stream:
        request_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
        async def generate_stream() -> AsyncIterator[str]:
            """Generate streaming response with true non-blocking streaming."""
            try:
                # Run OCI SDK call in executor to prevent blocking
                # This is critical for achieving true streaming (msToFirstChunk < 1s)
                loop = asyncio.get_event_loop()
                response = await loop.run_in_executor(
                    None,
                    lambda: oci_client.chat(
                        model_id=request.model,
                        messages=messages,
                        temperature=params["temperature"],
                        max_tokens=params["max_tokens"],
                        top_p=params["top_p"],
                        stream=True,  # Enable real streaming
                        tools=params.get("tools"),
                    )
                )
                # Process real streaming response
                accumulated_usage = None
                # Check if response.data is an SSE stream (iterable)
                # When stream=True, OCI SDK returns response.data as SSEClient
                try:
                    # Try to iterate over the stream
                    stream_data = response.data if hasattr(response, 'data') else response
                    # Check if it's SSEClient or any iterable type
                    stream_type_name = type(stream_data).__name__
                    is_sse_client = 'SSEClient' in stream_type_name
                    is_iterable = hasattr(stream_data, '__iter__') or hasattr(stream_data, '__next__')
                    # SSEClient is always treated as streaming, even if hasattr check fails
                    if is_sse_client or is_iterable:
                        # Real streaming: iterate over chunks
                        # SSEClient requires calling .events() method to iterate
                        if is_sse_client and hasattr(stream_data, 'events'):
                            iterator = stream_data.events()
                        else:
                            iterator = stream_data
                        # Send first chunk with role and empty content (OpenAI format)
                        yield adapt_streaming_chunk("", request.model, request_id, 0, is_first=True)
                        # Use queue for thread-safe chunk forwarding
                        import queue
                        import threading
                        chunk_queue = queue.Queue()
                        def read_chunks():
                            """Read chunks in background thread and put in queue."""
                            try:
                                for chunk in iterator:
                                    chunk_queue.put(("chunk", chunk))
                                chunk_queue.put(("done", None))
                            except Exception as e:
                                chunk_queue.put(("error", e))
                        # Start background thread to read chunks
                        reader_thread = threading.Thread(target=read_chunks, daemon=True)
                        reader_thread.start()
                        # Yield chunks as they arrive from queue
                        while True:
                            # Non-blocking queue get with timeout
                            try:
                                msg_type, data = await loop.run_in_executor(
                                    None,
                                    lambda: chunk_queue.get(timeout=0.01)
                                )
                            except queue.Empty:
                                # Allow other async tasks to run
                                await asyncio.sleep(0)
                                continue
                            if msg_type == "done":
                                break
                            elif msg_type == "error":
                                raise data
                            elif msg_type == "chunk":
                                chunk = data
                                # Extract delta content from chunk
                                delta_text = extract_delta_from_chunk(chunk)
                                if delta_text:
                                    yield adapt_streaming_chunk(delta_text, request.model, request_id, 0, is_first=False)
                                # Try to extract usage from chunk (typically in final chunk)
                                # Handle both SSE Event format and object format
                                if hasattr(chunk, 'data'):
                                    # SSE Event - parse JSON to extract usage
                                    try:
                                        import json
                                        parsed = json.loads(chunk.data)
                                        if isinstance(parsed, dict) and 'usage' in parsed:
                                            usage_data = parsed['usage']
                                            accumulated_usage = {
                                                "prompt_tokens": usage_data.get('promptTokens', 0) or 0,
                                                "completion_tokens": usage_data.get('completionTokens', 0) or 0,
                                                "total_tokens": usage_data.get('totalTokens', 0) or 0
                                            }
                                    except (json.JSONDecodeError, KeyError, TypeError):
                                        pass
                                elif hasattr(chunk, 'usage') and chunk.usage:
                                    # Object format
                                    accumulated_usage = {
                                        "prompt_tokens": getattr(chunk.usage, 'prompt_tokens', 0) or 0,
                                        "completion_tokens": getattr(chunk.usage, 'completion_tokens', 0) or 0,
                                        "total_tokens": getattr(chunk.usage, 'total_tokens', 0) or 0
                                    }
                        # Send done message with usage
                        yield adapt_streaming_done(request.model, request_id, usage=accumulated_usage)
                    else:
                        # Fallback: non-streaming response, simulate streaming
                        logger.warning(f"OCI SDK returned non-iterable response (type: {type(stream_data).__name__}), falling back to simulated streaming")
                        # Extract text from non-streaming response
                        chat_response = stream_data.chat_response if hasattr(stream_data, 'chat_response') else stream_data
                        content = extract_content_from_response(chat_response)
                        # Extract usage information
                        if hasattr(stream_data, 'usage'):
                            oci_usage = stream_data.usage
                            accumulated_usage = {
                                "prompt_tokens": getattr(oci_usage, 'prompt_tokens', 0) or 0,
                                "completion_tokens": getattr(oci_usage, 'completion_tokens', 0) or 0,
                                "total_tokens": getattr(oci_usage, 'total_tokens', 0) or 0
                            }
                        # Simulate streaming by chunking
                        # First send empty chunk with role (OpenAI format)
                        yield adapt_streaming_chunk("", request.model, request_id, 0, is_first=True)
                        chunk_size = settings.stream_chunk_size
                        for i in range(0, len(content), chunk_size):
                            chunk = content[i:i + chunk_size]
                            yield adapt_streaming_chunk(chunk, request.model, request_id, 0, is_first=False)
                        yield adapt_streaming_done(request.model, request_id, usage=accumulated_usage)
                except TypeError as te:
                    # Handle case where response is not iterable at all
                    logger.error(f"Response is not iterable: {te}", exc_info=True)
                    raise
            except Exception as e:
                logger.error(f"Error in streaming: {str(e)}", exc_info=True)
                import json
                # 根据异常类型处理并过滤敏感信息
                if isinstance(e, ServiceError):
                    error_response = OCIErrorHandler.sanitize_oci_error(e)
                else:
                    # 通用错误也要过滤可能包含的敏感信息
                    filtered_msg = OCIErrorHandler.filter_sensitive_info(str(e))
                    error_response = ErrorResponse(
                        error=ErrorDetail(
                            message="An error occurred during streaming",
                            type="server_error",
                            code="streaming_error"
                        )
                    )
                yield f"data: {json.dumps(error_response.dict(), ensure_ascii=False)}\n\n"
        return StreamingResponse(
            generate_stream(),
            media_type="text/event-stream"
        )
    # Non-streaming response
    try:
        response = oci_client.chat(
            model_id=request.model,
            messages=messages,
            temperature=params["temperature"],
            max_tokens=params["max_tokens"],
            top_p=params["top_p"],
            stream=False,
            tools=params.get("tools"),
        )
        # Adapt response to OpenAI format
        openai_response = adapt_chat_response(response, request.model)
        if settings.log_responses:
            logger.debug(f"Response: {openai_response}")
        return openai_response
    except Exception as e:
        logger.error(f"Error in chat completion: {str(e)}", exc_info=True)
        # 直接 raise，让全局异常处理器统一过滤敏感信息
        raise
--- a/src/api/routers/embeddings.py
+++ b/src/api/routers/embeddings.py
@@ -0,0 +1,85 @@
 """
 Embeddings API router - OpenAI compatible embeddings endpoint.
 """
 import logging
 from fastapi import APIRouter, Depends, HTTPException
 from api.auth import get_api_key
 from api.schemas import EmbeddingRequest, EmbeddingResponse
 from api.adapters.request_adapter import adapt_embedding_input
 from api.adapters.response_adapter import adapt_embedding_response
 from api.exceptions import ModelNotFoundException, InvalidModelTypeException
 from core.config import get_settings
 from core.client_manager import get_client_manager
 from core.models import get_model_config
 logger = logging.getLogger(__name__)
 router = APIRouter(
    prefix="/embeddings",
    tags=["embeddings"],
    dependencies=[Depends(get_api_key)]
 )
@router.post("", response_model=EmbeddingResponse)
@router.post("/", response_model=EmbeddingResponse)
 async def create_embeddings(request: EmbeddingRequest):
    """
    Create embeddings using OCI Generative AI.
    Args:
        request: Embedding request
    Returns:
        Embedding response
    """
    logger.info(f"Embedding request for model: {request.model}")
    settings = get_settings()
    # Validate model exists
    model_config = get_model_config(request.model)
    if not model_config:
        raise ModelNotFoundException(request.model)
    # Validate model type is embedding
    if model_config.type != "embedding":
        raise InvalidModelTypeException(
            model_id=request.model,
            expected_type="embedding",
            actual_type=model_config.type
        )
    # Get OCI client from manager (轮询负载均衡)
    client_manager = get_client_manager()
    oci_client = client_manager.get_client()
    # Adapt input
    texts = adapt_embedding_input(request)
    input_count = len(texts)
    try:
        # Generate embeddings
        response = oci_client.embed(
            model_id=request.model,
            texts=texts,
            truncate=settings.embed_truncate,
        )
        # Adapt response to OpenAI format
        openai_response = adapt_embedding_response(
            response,
            request.model,
            input_count
        )
        if settings.log_responses:
            logger.debug(f"Embeddings generated: {len(openai_response.data)} vectors")
        return openai_response
    except Exception as e:
        logger.error(f"Error in embedding generation: {str(e)}", exc_info=True)
        # 直接 raise，让全局异常处理器统一过滤敏感信息
        raise
--- a/src/api/routers/models.py
+++ b/src/api/routers/models.py
@@ -0,0 +1,78 @@
 """
 Models API router - OpenAI compatible model listing.
 """
 import logging
 from fastapi import APIRouter, Depends
 from api.auth import get_api_key
 from api.schemas import ModelListResponse, ModelInfo
 from core.models import get_all_models
 logger = logging.getLogger(__name__)
 router = APIRouter(
    prefix="/models",
    tags=["models"],
    dependencies=[Depends(get_api_key)]
 )
@router.get("", response_model=ModelListResponse)
@router.get("/", response_model=ModelListResponse)
 async def list_models():
    """
    List available models in OpenAI format.
    Returns:
        ModelListResponse: List of available models
    """
    logger.info("Listing available models")
    models = get_all_models()
    model_list = [
        ModelInfo(
            id=model.id,
            object="model",
            created=0,
            owned_by="oracle"
        )
        for model in models
    ]
    return ModelListResponse(
        object="list",
        data=model_list
    )
@router.get("/{model_id}", response_model=ModelInfo)
 async def get_model(model_id: str):
    """
    Get information about a specific model.
    Args:
        model_id: Model identifier
    Returns:
        ModelInfo: Model information
    """
    logger.info(f"Getting model info: {model_id}")
    from core.models import get_model_config
    model_config = get_model_config(model_id)
    if not model_config:
        from fastapi import HTTPException
        raise HTTPException(
            status_code=404,
            detail=f"Model {model_id} not found"
        )
    return ModelInfo(
        id=model_config.id,
        object="model",
        created=0,
        owned_by="oracle"
    )
--- a/src/api/schemas.py
+++ b/src/api/schemas.py
@@ -0,0 +1,139 @@
 """
 OpenAI-compatible API schemas.
 """
 from typing import List, Optional, Union, Dict, Any, Literal
 from pydantic import BaseModel, Field
 # ============= Chat Completion Schemas =============
 class ChatMessage(BaseModel):
    """A chat message."""
    role: Literal["system", "user", "assistant", "tool"]
    content: Union[str, List[Dict[str, Any]]]
    name: Optional[str] = None
    tool_calls: Optional[List[Dict[str, Any]]] = None
    tool_call_id: Optional[str] = None
 class ChatCompletionRequest(BaseModel):
    """OpenAI chat completion request."""
    model: str
    messages: List[ChatMessage]
    temperature: Optional[float] = 0.7
    top_p: Optional[float] = 1.0
    n: Optional[int] = 1
    stream: Optional[bool] = True  # Default to streaming
    stop: Optional[Union[str, List[str]]] = None
    max_tokens: Optional[int] = None
    presence_penalty: Optional[float] = 0.0
    frequency_penalty: Optional[float] = 0.0
    logit_bias: Optional[Dict[str, float]] = None
    user: Optional[str] = None
    tools: Optional[List[Dict[str, Any]]] = None
    tool_choice: Optional[Union[str, Dict[str, Any]]] = None
 class ChatCompletionChoice(BaseModel):
    """A chat completion choice."""
    index: int
    message: ChatMessage
    finish_reason: Optional[str] = None
    logprobs: Optional[Dict[str, Any]] = None
 class ChatCompletionUsage(BaseModel):
    """Token usage information."""
    prompt_tokens: int
    completion_tokens: int
    total_tokens: int
 class ChatCompletionResponse(BaseModel):
    """OpenAI chat completion response."""
    id: str
    object: str = "chat.completion"
    created: int
    model: str
    choices: List[ChatCompletionChoice]
    usage: Optional[ChatCompletionUsage] = None
    system_fingerprint: Optional[str] = None
 class ChatCompletionStreamChoice(BaseModel):
    """A streaming chat completion choice."""
    index: int
    delta: Dict[str, Any]
    finish_reason: Optional[str] = None
 class ChatCompletionStreamResponse(BaseModel):
    """OpenAI streaming chat completion response."""
    id: str
    object: str = "chat.completion.chunk"
    created: int
    model: str
    choices: List[ChatCompletionStreamChoice]
    system_fingerprint: Optional[str] = None
 # ============= Embedding Schemas =============
 class EmbeddingRequest(BaseModel):
    """OpenAI embedding request."""
    model: str
    input: Union[str, List[str]]
    encoding_format: Optional[str] = "float"
    user: Optional[str] = None
 class EmbeddingData(BaseModel):
    """Embedding data."""
    object: str = "embedding"
    embedding: List[float]
    index: int
 class EmbeddingUsage(BaseModel):
    """Embedding usage information."""
    prompt_tokens: int
    total_tokens: int
 class EmbeddingResponse(BaseModel):
    """OpenAI embedding response."""
    object: str = "list"
    data: List[EmbeddingData]
    model: str
    usage: EmbeddingUsage
 # ============= Model Schemas =============
 class ModelInfo(BaseModel):
    """Model information."""
    id: str
    object: str = "model"
    created: int = 0
    owned_by: str = "oracle"
 class ModelListResponse(BaseModel):
    """Model list response."""
    object: str = "list"
    data: List[ModelInfo]
 # ============= Error Schemas =============
 class ErrorDetail(BaseModel):
    """Error detail."""
    message: str
    type: str
    param: Optional[str] = None
    code: Optional[str] = None
 class ErrorResponse(BaseModel):
    """Error response."""
    error: ErrorDetail
--- a/src/core/init.py
+++ b/src/core/init.py
@@ -0,0 +1 @@
 """Core module for OCI GenAI Gateway."""
--- a/src/core/client_manager.py
+++ b/src/core/client_manager.py
@@ -0,0 +1,70 @@
 """
 简单的 OCI 客户端管理器，支持多 profile 轮询负载均衡
 """
 import logging
 from typing import List, Dict
 from threading import Lock
 from .config import Settings, get_settings
 from .oci_client import OCIGenAIClient
 logger = logging.getLogger(__name__)
 class OCIClientManager:
    """OCI 客户端管理器，支持轮询负载均衡和客户端连接池"""
    def __init__(self, settings: Settings = None):
        self.settings = settings or get_settings()
        self.profiles = self.settings.get_profiles()
        self.current_index = 0
        self.lock = Lock()
        # 预创建客户端连接池
        self._clients: Dict[str, OCIGenAIClient] = {}
        logger.info(f"初始化 OCI 客户端管理器，共 {len(self.profiles)} 个 profiles: {self.profiles}")
        for profile in self.profiles:
            try:
                self._clients[profile] = OCIGenAIClient(self.settings, profile)
                logger.info(f"✓ 已创建客户端实例: {profile}")
            except Exception as e:
                logger.error(f"✗ 创建客户端实例失败 [{profile}]: {e}")
                raise
    def get_client(self) -> OCIGenAIClient:
        """
        获取下一个客户端（轮询策略）
        采用 round-robin 算法从预创建的客户端连接池中选择客户端实例。
        此方法是线程安全的。
        Returns:
            OCIGenAIClient: 预创建的 OCI 客户端实例
        Note:
            客户端实例在管理器初始化时预创建，此方法不会创建新实例。
        """
        with self.lock:
            # 如果只有一个 profile，直接返回
            if len(self.profiles) == 1:
                return self._clients[self.profiles[0]]
            # 轮询选择 profile
            profile = self.profiles[self.current_index]
            self.current_index = (self.current_index + 1) % len(self.profiles)
            logger.debug(f"选择 profile: {profile} (round-robin)")
            return self._clients[profile]
 # 全局客户端管理器实例
 _client_manager = None
 def get_client_manager() -> OCIClientManager:
    """获取全局客户端管理器实例"""
    global _client_manager
    if _client_manager is None:
        _client_manager = OCIClientManager()
    return _client_manager
--- a/src/core/config.py
+++ b/src/core/config.py
@@ -0,0 +1,100 @@
 """
 Configuration module for OCI Generative AI to OpenAI API Gateway.
 """
 import os
 import logging
 from pathlib import Path
 from typing import Optional, List
 from pydantic_settings import BaseSettings
 logger = logging.getLogger(__name__)
 # Find project root directory (where .env should be)
 def find_project_root() -> Path:
    """Find the project root directory by looking for .env or requirements.txt."""
    current = Path(__file__).resolve().parent  # Start from src/core/
    # Go up until we find project root markers
    while current != current.parent:
        if (current / ".env").exists() or (current / "requirements.txt").exists():
            return current
        current = current.parent
    return Path.cwd()  # Fallback to current directory
 PROJECT_ROOT = find_project_root()
 class Settings(BaseSettings):
    """Application settings with environment variable support."""
    # API Settings
    api_title: str = "OCI GenAI to OpenAI API Gateway"
    api_version: str = "1.0.0"
    api_prefix: str = "/v1"
    api_port: int = 8000
    api_host: str = "0.0.0.0"
    debug: bool = False
    # Authentication
    api_keys: List[str] = ["sk-oci-genai-default-key"]
    # OCI Settings
    oci_config_file: str = "~/.oci/config"
    oci_config_profile: str = "DEFAULT"  # 支持多个profile，用逗号分隔，例如：DEFAULT,CHICAGO,ASHBURN
    oci_auth_type: str = "api_key"  # api_key or instance_principal
    # GenAI Service Settings
    genai_endpoint: Optional[str] = None
    max_tokens: int = 4096
    temperature: float = 0.7
    # Embedding Settings
    embed_truncate: str = "END"  # END or START
    # Streaming Settings
    enable_streaming: bool = True
    stream_chunk_size: int = 1024
    # Logging
    log_level: str = "INFO"
    log_requests: bool = False
    log_responses: bool = False
    log_file: Optional[str] = None
    log_file_max_size: int = 10  # MB
    log_file_backup_count: int = 5
    class Config:
        # Use absolute path to .env file in project root
        env_file = str(PROJECT_ROOT / ".env")
        env_file_encoding = "utf-8"
        case_sensitive = False
        # Allow reading from environment variables
        env_prefix = ""
    def model_post_init(self, __context) -> None:
        """Expand OCI config file path."""
        # Expand OCI config file path
        config_path = os.path.expanduser(self.oci_config_file)
        # If it's a relative path (starts with ./ or doesn't start with /), resolve it from project root
        if not config_path.startswith('/') and not config_path.startswith('~'):
            # Remove leading ./ if present
            if config_path.startswith('./'):
                config_path = config_path[2:]
            config_path = str(PROJECT_ROOT / config_path)
        # Update the config_path
        self.oci_config_file = config_path
    def get_profiles(self) -> List[str]:
        """获取配置的所有 profile 列表"""
        return [p.strip() for p in self.oci_config_profile.split(',') if p.strip()]
 # Global settings instance
 settings = Settings()
 def get_settings() -> Settings:
    """Get the global settings instance."""
    return settings
--- a/src/core/models.py
+++ b/src/core/models.py
@@ -0,0 +1,260 @@
 """
 Model definitions and configurations for OCI Generative AI models.
 """
 import logging
 import os
 from typing import Dict, List, Optional
 from pydantic import BaseModel
 logger = logging.getLogger(__name__)
 class ModelConfig(BaseModel):
    """Configuration for a single model."""
    id: str
    name: str
    type: str  # ondemand, dedicated, embedding
    provider: str  # cohere, meta, openai, etc.
    region: Optional[str] = None
    compartment_id: Optional[str] = None
    endpoint: Optional[str] = None
    supports_streaming: bool = True
    supports_tools: bool = False
    supports_multimodal: bool = False
    multimodal_types: List[str] = []
    max_tokens: int = 4096
    context_window: int = 128000
 # OCI Generative AI models (dynamically loaded from OCI at startup)
 OCI_CHAT_MODELS: Dict[str, ModelConfig] = {}
 OCI_EMBED_MODELS: Dict[str, ModelConfig] = {}
 def get_all_models() -> List[ModelConfig]:
    """Get all available models."""
    return list(OCI_CHAT_MODELS.values()) + list(OCI_EMBED_MODELS.values())
 def get_chat_models() -> List[ModelConfig]:
    """Get all chat models."""
    return list(OCI_CHAT_MODELS.values())
 def get_embed_models() -> List[ModelConfig]:
    """Get all embedding models."""
    return list(OCI_EMBED_MODELS.values())
 def get_model_config(model_id: str) -> Optional[ModelConfig]:
    """Get configuration for a specific model."""
    if model_id in OCI_CHAT_MODELS:
        return OCI_CHAT_MODELS[model_id]
    if model_id in OCI_EMBED_MODELS:
        return OCI_EMBED_MODELS[model_id]
    return None
 def fetch_models_from_oci(compartment_id: Optional[str] = None, region: Optional[str] = None,
                          config_path: str = "./.oci/config",
                          profile: str = "DEFAULT") -> Dict[str, Dict[str, ModelConfig]]:
    """
    Dynamically fetch available models from OCI Generative AI service.
    If compartment_id or region are not provided, they will be read from the OCI config file.
    - compartment_id defaults to 'tenancy' from config
    - region defaults to 'region' from config
    Args:
        compartment_id: OCI compartment ID (optional, defaults to tenancy from config)
        region: OCI region (optional, defaults to region from config)
        config_path: Path to OCI config file
        profile: OCI config profile name
    Returns:
        Dictionary with 'chat' and 'embed' keys containing model configs
    """
    try:
        import oci
        from oci.generative_ai import GenerativeAiClient
        # Load OCI configuration
        config = oci.config.from_file(
            file_location=os.path.expanduser(config_path),
            profile_name=profile
        )
        # Use values from config if not provided
        if not region:
            region = config.get("region")
            logger.info(f"📍 Using region from OCI config: {region}")
        if not compartment_id:
            compartment_id = config.get("tenancy")
            logger.info(f"📦 Using tenancy as compartment_id: {compartment_id}")
        if not region or not compartment_id:
            logger.error("❌ Missing region or compartment_id in OCI config")
            return {"chat": {}, "embed": {}}
        # Create GenerativeAiClient (not GenerativeAiInferenceClient)
        service_endpoint = f"https://generativeai.{region}.oci.oraclecloud.com"
        logger.info(f"🔗 Connecting to OCI GenerativeAI endpoint: {service_endpoint}")
        client = GenerativeAiClient(config, service_endpoint=service_endpoint)
        chat_models = {}
        embed_models = {}
        # Fetch all models (without capability filter to work with tenancy compartment)
        try:
            logger.info("🔍 Fetching all models from OCI...")
            logger.debug(f"   Compartment ID: {compartment_id}")
            logger.debug(f"   Method: Fetching all models, will filter by capabilities in Python")
            response = client.list_models(
                compartment_id=compartment_id
            )
            logger.info(f"✅ Successfully fetched {len(response.data.items)} models from OCI")
            # Filter models by capabilities in Python
            for model in response.data.items:
                model_id = model.display_name
                provider = model_id.split(".")[0] if "." in model_id else "unknown"
                capabilities = model.capabilities if hasattr(model, 'capabilities') else []
                logger.debug(f"   Processing: {model_id} (capabilities: {capabilities})")
                # Chat models: have CHAT or TEXT_GENERATION capability
                if 'CHAT' in capabilities or 'TEXT_GENERATION' in capabilities:
                    supports_streaming = True  # Most models support streaming
                    supports_tools = provider in ["cohere", "meta"]  # These providers support tools
                    # Detect multimodal support from capabilities
                    supports_multimodal = False
                    multimodal_types = []
                    if 'IMAGE' in capabilities or 'VISION' in capabilities:
                        supports_multimodal = True
                        multimodal_types.append("image")
                    chat_models[model_id] = ModelConfig(
                        id=model_id,
                        name=model.display_name,
                        type="ondemand",
                        provider=provider,
                        region=region,
                        compartment_id=compartment_id,
                        supports_streaming=supports_streaming,
                        supports_tools=supports_tools,
                        supports_multimodal=supports_multimodal,
                        multimodal_types=multimodal_types,
                        max_tokens=4096,
                        context_window=128000
                    )
                # Embedding models: have TEXT_EMBEDDINGS capability
                elif 'TEXT_EMBEDDINGS' in capabilities:
                    embed_models[model_id] = ModelConfig(
                        id=model_id,
                        name=model.display_name,
                        type="embedding",
                        provider=provider,
                        region=region,
                        compartment_id=compartment_id,
                        supports_streaming=False,
                        supports_tools=False,
                        max_tokens=512,
                        context_window=512
                    )
            logger.info(f"✅ Filtered {len(chat_models)} chat models")
            if chat_models:
                logger.debug(f"   Chat models: {', '.join(list(chat_models.keys())[:5])}{'...' if len(chat_models) > 5 else ''}")
            logger.info(f"✅ Filtered {len(embed_models)} embedding models")
            if embed_models:
                logger.debug(f"   Embed models: {', '.join(embed_models.keys())}")
        except Exception as e:
            logger.warning(f"⚠️  Failed to fetch models from OCI")
            logger.warning(f"   Error: {e}")
            if hasattr(e, 'status'):
                logger.warning(f"   HTTP Status: {e.status}")
            if hasattr(e, 'code'):
                logger.warning(f"   Error Code: {e.code}")
            logger.info(f"💡 Tip: Check your OCI credentials and permissions")
        return {"chat": chat_models, "embed": embed_models}
    except Exception as e:
        logger.error(f"❌ Failed to initialize OCI client for model discovery")
        logger.error(f"   Error: {e}")
        logger.info("💡 Tip: Check your OCI credentials and permissions")
        return {"chat": {}, "embed": {}}
 def update_models_from_oci(compartment_id: Optional[str] = None,
                           region: Optional[str] = None,
                           config_path: str = "./.oci/config",
                           profile: str = "DEFAULT") -> None:
    """
    Update global model dictionaries with models from OCI.
    Raises RuntimeError if model fetching fails.
    Priority for configuration values:
    1. Explicitly provided parameters
    2. Environment variables (OCI_COMPARTMENT_ID, OCI_REGION)
    3. Values from .oci/config file (tenancy, region)
    Raises:
        RuntimeError: If no models can be fetched from OCI
    """
    global OCI_CHAT_MODELS, OCI_EMBED_MODELS
    # Priority: explicit params > environment > config file
    if not compartment_id:
        compartment_id = os.getenv("OCI_COMPARTMENT_ID")
    if not region:
        region = os.getenv("OCI_REGION")
    # Note: If still not set, fetch_models_from_oci will try to read from config file
    logger.info("🚀 Attempting to fetch models from OCI...")
    fetched = fetch_models_from_oci(compartment_id, region, config_path, profile)
    # Fail-fast: Require successful model fetching
    if not fetched["chat"] and not fetched["embed"]:
        error_msg = (
            "❌ Failed to fetch any models from OCI.\n\n"
            "Troubleshooting steps:\n"
            "1. Verify your OCI credentials are configured correctly:\n"
            f"   - Config file: {config_path}\n"
            f"   - Profile: {profile}\n"
            "   - Run: oci iam region list (to test authentication)\n\n"
            "2. Check your OCI permissions:\n"
            "   - Ensure you have access to Generative AI service\n"
            "   - Verify compartment_id/tenancy has available models\n\n"
            "3. Check network connectivity:\n"
            "   - Ensure you can reach OCI endpoints\n"
            f"   - Test region: {region or 'from config file'}\n\n"
            "4. Review logs above for detailed error messages"
        )
        logger.error(error_msg)
        raise RuntimeError(
            "Failed to fetch models from OCI. "
            "The service cannot start without available models. "
            "Check the logs above for troubleshooting guidance."
        )
    # Update global model registries
    if fetched["chat"]:
        OCI_CHAT_MODELS.clear()
        OCI_CHAT_MODELS.update(fetched["chat"])
        logger.info(f"✅ Loaded {len(OCI_CHAT_MODELS)} chat models from OCI")
    if fetched["embed"]:
        OCI_EMBED_MODELS.clear()
        OCI_EMBED_MODELS.update(fetched["embed"])
        logger.info(f"✅ Loaded {len(OCI_EMBED_MODELS)} embedding models from OCI")
    logger.info(f"✅ Model discovery completed successfully")
--- a/src/core/oci_client.py
+++ b/src/core/oci_client.py
@@ -0,0 +1,361 @@
 """
 OCI Generative AI client wrapper.
 """
 import os
 import logging
 from typing import Optional, AsyncIterator
 import oci
 from oci.generative_ai_inference import GenerativeAiInferenceClient
 from oci.generative_ai_inference.models import (
    ChatDetails,
    CohereChatRequest,
    GenericChatRequest,
    OnDemandServingMode,
    DedicatedServingMode,
    CohereMessage,
    Message,
    TextContent,
    EmbedTextDetails,
 )
 # Try to import multimodal content types
 try:
    from oci.generative_ai_inference.models import (
        ImageContent,
        ImageUrl,
        AudioContent,
        AudioUrl,
        VideoContent,
        VideoUrl,
    )
    MULTIMODAL_SUPPORTED = True
    logger_init = logging.getLogger(__name__)
    logger_init.info("OCI SDK multimodal content types available")
 except ImportError:
    MULTIMODAL_SUPPORTED = False
    logger_init = logging.getLogger(__name__)
    logger_init.warning("OCI SDK does not support multimodal content types, using dict format as fallback")
 from .config import Settings
 from .models import get_model_config, ModelConfig
 logger = logging.getLogger(__name__)
 def build_multimodal_content(content_list: list) -> list:
    """
    Build OCI ChatContent object array from adapted content list.
    Supports both HTTP URLs and Base64 data URIs (data:image/jpeg;base64,...).
    Args:
        content_list: List of content items from request adapter
    Returns:
        List of OCI ChatContent objects or dicts (fallback)
    """
    if not MULTIMODAL_SUPPORTED:
        # Fallback: return dict format, OCI SDK might auto-convert
        return content_list
    oci_contents = []
    for item in content_list:
        if not isinstance(item, dict):
            continue
        item_type = item.get("type")
        if item_type == "text":
            oci_contents.append(TextContent(text=item.get("text", "")))
        elif item_type == "image_url":
            image_data = item.get("image_url", {})
            if "url" in image_data:
                # ImageUrl accepts both HTTP URLs and data URIs (data:image/jpeg;base64,...)
                img_url = ImageUrl(url=image_data["url"])
                # Optional: support 'detail' parameter if provided
                if "detail" in image_data:
                    img_url.detail = image_data["detail"]
                oci_contents.append(ImageContent(image_url=img_url, type="IMAGE"))
        elif item_type == "audio":
            audio_data = item.get("audio_url", {})
            if "url" in audio_data:
                # AudioUrl accepts both HTTP URLs and data URIs (data:audio/wav;base64,...)
                audio_url = AudioUrl(url=audio_data["url"])
                oci_contents.append(AudioContent(audio_url=audio_url, type="AUDIO"))
        elif item_type == "video":
            video_data = item.get("video_url", {})
            if "url" in video_data:
                # VideoUrl accepts both HTTP URLs and data URIs (data:video/mp4;base64,...)
                video_url = VideoUrl(url=video_data["url"])
                oci_contents.append(VideoContent(video_url=video_url, type="VIDEO"))
    return oci_contents if oci_contents else [TextContent(text="")]
 class OCIGenAIClient:
    """Wrapper for OCI Generative AI client."""
    def __init__(self, settings: Settings, profile: Optional[str] = None):
        """
        初始化 OCI GenAI 客户端
        Args:
            settings: 应用设置
            profile: 可选的 OCI 配置 profile 名称。如果未提供，使用 settings 中的第一个 profile
        """
        self.settings = settings
        self.profile = profile or settings.get_profiles()[0]
        self._client: Optional[GenerativeAiInferenceClient] = None
        self._config: Optional[oci.config.Config] = None
        self._region: Optional[str] = None
        self._compartment_id: Optional[str] = None
    def _get_config(self) -> dict:
        """Get OCI configuration."""
        if self._config is None:
            if self.settings.oci_auth_type == "instance_principal":
                signer = oci.auth.signers.InstancePrincipalsSecurityTokenSigner()
                self._config = {"signer": signer}
            else:
                config_path = os.path.expanduser(self.settings.oci_config_file)
                self._config = oci.config.from_file(
                    file_location=config_path,
                    profile_name=self.profile
                )
                # 从配置中读取 region 和 compartment_id
                if self._region is None:
                    self._region = self._config.get("region")
                if self._compartment_id is None:
                    self._compartment_id = self._config.get("tenancy")
        return self._config
    @property
    def region(self) -> Optional[str]:
        """获取当前配置的区域"""
        if self._region is None and self._config is None:
            self._get_config()
        return self._region
    @property
    def compartment_id(self) -> Optional[str]:
        """获取当前配置的 compartment ID"""
        if self._compartment_id is None and self._config is None:
            self._get_config()
        return self._compartment_id
    def _get_client(self) -> GenerativeAiInferenceClient:
        """Get or create OCI Generative AI Inference client with correct endpoint."""
        config = self._get_config()
        # Use INFERENCE endpoint (not management endpoint)
        # Official format: https://inference.generativeai.{region}.oci.oraclecloud.com
        inference_endpoint = f"https://inference.generativeai.{self.region}.oci.oraclecloud.com"
        if isinstance(config, dict) and "signer" in config:
            # For instance principal
            client = GenerativeAiInferenceClient(
                config={},
                service_endpoint=inference_endpoint,
                **config
            )
            return client
        # For API key authentication
        client = GenerativeAiInferenceClient(
            config=config,
            service_endpoint=inference_endpoint,
            retry_strategy=oci.retry.NoneRetryStrategy(),
            timeout=(10, 240)
        )
        return client
    def chat(
        self,
        model_id: str,
        messages: list,
        temperature: float = 0.7,
        max_tokens: int = 1024,
        top_p: float = 1.0,
        stream: bool = False,
        tools: Optional[list] = None,
    ):
        """Send a chat completion request to OCI GenAI."""
        model_config = get_model_config(model_id)
        if not model_config:
            raise ValueError(f"Unsupported model: {model_id}")
        if not self.compartment_id:
            raise ValueError("Compartment ID is required")
        client = self._get_client()
        # Prepare serving mode
        if model_config.type == "dedicated" and model_config.endpoint:
            serving_mode = DedicatedServingMode(endpoint_id=model_config.endpoint)
        else:
            serving_mode = OnDemandServingMode(model_id=model_id)
        # Convert messages based on provider
        if model_config.provider == "cohere":
            chat_request = self._build_cohere_request(
                messages, temperature, max_tokens, top_p, tools, stream
            )
        elif model_config.provider in ["meta", "xai", "google", "openai"]:
            chat_request = self._build_generic_request(
                messages, temperature, max_tokens, top_p, tools, model_config.provider, stream
            )
        else:
            raise ValueError(f"Unsupported provider: {model_config.provider}")
        chat_details = ChatDetails(
            serving_mode=serving_mode,
            compartment_id=self.compartment_id,
            chat_request=chat_request,
        )
        logger.debug(f"Sending chat request to OCI GenAI: {model_id}")
        response = client.chat(chat_details)
        return response
    def _build_cohere_request(
        self, messages: list, temperature: float, max_tokens: int, top_p: float, tools: Optional[list], stream: bool = False
    ) -> CohereChatRequest:
        """Build Cohere chat request.
        Note: Cohere models only support text content, not multimodal.
        """
        # Convert messages to Cohere format
        chat_history = []
        message = None
        for msg in messages:
            role = msg["role"]
            content = msg["content"]
            # Extract text from multimodal content
            if isinstance(content, list):
                # Extract text parts only
                text_parts = []
                for item in content:
                    if isinstance(item, dict) and item.get("type") == "text":
                        text_parts.append(item.get("text", ""))
                content = " ".join(text_parts) if text_parts else ""
            if role == "system":
                # Cohere uses preamble for system messages
                continue
            elif role == "user":
                message = content
            elif role == "assistant":
                chat_history.append(
                    CohereMessage(role="CHATBOT", message=content)
                )
            elif role == "tool":
                # Handle tool responses if needed
                pass
        # Get preamble from system messages
        preamble_override = None
        for msg in messages:
            if msg["role"] == "system":
                preamble_override = msg["content"]
                break
        return CohereChatRequest(
            message=message,
            chat_history=chat_history if chat_history else None,
            preamble_override=preamble_override,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
            is_stream=stream,
        )
    def _build_generic_request(
        self, messages: list, temperature: float, max_tokens: int, top_p: float, tools: Optional[list], provider: str, stream: bool = False
    ) -> GenericChatRequest:
        """Build Generic chat request for Llama and other models."""
        # Convert messages to Generic format
        generic_messages = []
        for msg in messages:
            role = msg["role"]
            content = msg["content"]
            # Handle multimodal content
            if isinstance(content, list):
                # Build OCI ChatContent objects from multimodal content
                oci_contents = build_multimodal_content(content)
            else:
                # Simple text content
                if MULTIMODAL_SUPPORTED:
                    oci_contents = [TextContent(text=content)]
                else:
                    # Fallback: use dict format
                    oci_contents = [{"type": "text", "text": content}]
            if role == "user":
                oci_role = "USER"
            elif role in ["assistant", "model"]:
                oci_role = "ASSISTANT"
            elif role == "system":
                oci_role = "SYSTEM"
            else:
                oci_role = role.upper()
            # Create Message with role and content objects
            logger.debug(f"Creating message with role: {oci_role}, provider: {provider}, original role: {role}")
            generic_messages.append(
                Message(
                    role=oci_role,
                    content=oci_contents
                )
            )
        return GenericChatRequest(
            messages=generic_messages,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
            is_stream=stream,
        )
    def embed(
        self,
        model_id: str,
        texts: list,
        truncate: str = "END",
    ):
        """Generate embeddings using OCI GenAI."""
        model_config = get_model_config(model_id)
        if not model_config or model_config.type != "embedding":
            raise ValueError(f"Invalid embedding model: {model_id}")
        if not self.compartment_id:
            raise ValueError("Compartment ID is required")
        client = self._get_client()
        serving_mode = OnDemandServingMode(
            serving_type="ON_DEMAND",
            model_id=model_id
        )
        embed_details = EmbedTextDetails(
            serving_mode=serving_mode,
            compartment_id=self.compartment_id,
            inputs=texts,
            truncate=truncate,
            is_echo=False,
            input_type="SEARCH_QUERY",
        )
        logger.debug(f"Sending embed request to OCI GenAI: {model_id}")
        response = client.embed_text(embed_details)
        return response
--- a/src/main.py
+++ b/src/main.py
@@ -0,0 +1,274 @@
 """
 Main FastAPI application for OCI Generative AI to OpenAI API Gateway.
 """
 import logging
 import sys
 import os
 from contextlib import asynccontextmanager
 from logging.handlers import RotatingFileHandler
 from fastapi import FastAPI, Request, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from fastapi.exceptions import RequestValidationError
 from oci.exceptions import ServiceError
 from core.config import get_settings
 from core.models import update_models_from_oci
 from api.routers import models, chat, embeddings
 from api.schemas import ErrorResponse, ErrorDetail
 from api.error_handler import OCIErrorHandler
 from api.exceptions import ModelNotFoundException, InvalidModelTypeException
 # Configure logging
 def setup_logging():
    """Setup logging configuration."""
    settings = get_settings()
    # Create handlers list
    handlers = [
        logging.StreamHandler(sys.stdout)
    ]
    # Add file handler if log_file is configured
    if settings.log_file:
        log_dir = os.path.dirname(settings.log_file)
        if log_dir and not os.path.exists(log_dir):
            os.makedirs(log_dir, exist_ok=True)
        file_handler = RotatingFileHandler(
            settings.log_file,
            maxBytes=settings.log_file_max_size * 1024 * 1024,  # Convert MB to bytes
            backupCount=settings.log_file_backup_count,
            encoding='utf-8'
        )
        handlers.append(file_handler)
    logging.basicConfig(
        level=getattr(logging, settings.log_level.upper()),
        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
        handlers=handlers
    )
 setup_logging()
 logger = logging.getLogger(__name__)
@asynccontextmanager
 async def lifespan(app: FastAPI):
    """Application lifespan handler."""
    logger.info("=" * 60)
    logger.info("Starting OCI GenAI to OpenAI API Gateway")
    logger.info("=" * 60)
    settings = get_settings()
    logger.info(f"API Version: {settings.api_version}")
    logger.info(f"API Prefix: {settings.api_prefix}")
    logger.info(f"Debug Mode: {settings.debug}")
    logger.info(f"OCI Config: {settings.oci_config_file}")
    profiles = settings.get_profiles()
    logger.info(f"OCI Profiles: {', '.join(profiles)}")
    try:
        # Fetch models from OCI (fails fast if unable to fetch)
        # 使用第一个 profile 进行模型发现
        update_models_from_oci(
            config_path=settings.oci_config_file,
            profile=profiles[0] if profiles else "DEFAULT"
        )
        logger.info("=" * 60)
        logger.info("✅ Startup completed successfully")
        logger.info(f"Server listening on {settings.api_host}:{settings.api_port}")
        logger.info("=" * 60)
    except RuntimeError as e:
        logger.error("=" * 60)
        logger.error("❌ STARTUP FAILED")
        logger.error("=" * 60)
        logger.error(f"Reason: {str(e)}")
        logger.error("")
        logger.error("The service cannot start without available models from OCI.")
        logger.error("Please review the troubleshooting steps above and fix the issue.")
        logger.error("=" * 60)
        raise
    except Exception as e:
        logger.error("=" * 60)
        logger.error("❌ UNEXPECTED STARTUP ERROR")
        logger.error("=" * 60)
        logger.error(f"Error type: {type(e).__name__}")
        logger.error(f"Error message: {str(e)}")
        logger.error("=" * 60)
        raise
    yield
    logger.info("=" * 60)
    logger.info("Shutting down OCI GenAI to OpenAI API Gateway")
    logger.info("=" * 60)
 # Create FastAPI app
 settings = get_settings()
 app = FastAPI(
    title=settings.api_title,
    version=settings.api_version,
    description="OpenAI-compatible REST API for Oracle Cloud Infrastructure Generative AI Service",
    lifespan=lifespan,
    docs_url="/docs" if settings.debug else None,
    redoc_url="/redoc" if settings.debug else None,
 )
 # Add CORS middleware
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
 )
 # Exception handlers
@app.exception_handler(ModelNotFoundException)
 async def model_not_found_handler(request: Request, exc: ModelNotFoundException):
    """Handle model not found exceptions with OpenAI-compatible format."""
    error = ErrorDetail(
        message=exc.detail,
        type=exc.error_type,
        code=exc.error_code
    )
    return JSONResponse(
        status_code=exc.status_code,
        content=ErrorResponse(error=error).dict()
    )
@app.exception_handler(InvalidModelTypeException)
 async def invalid_model_type_handler(request: Request, exc: InvalidModelTypeException):
    """Handle invalid model type exceptions with OpenAI-compatible format."""
    error = ErrorDetail(
        message=exc.detail,
        type=exc.error_type,
        code=exc.error_code
    )
    return JSONResponse(
        status_code=exc.status_code,
        content=ErrorResponse(error=error).dict()
    )
@app.exception_handler(ServiceError)
 async def oci_service_error_handler(request: Request, exc: ServiceError):
    """Handle OCI SDK ServiceError exceptions."""
    # 使用 OCIErrorHandler 处理并过滤敏感信息
    error_response = OCIErrorHandler.sanitize_oci_error(exc)
    # 确定 HTTP 状态码（使用 OCI 返回的状态码）
    status_code = exc.status if 400 <= exc.status < 600 else 500
    return JSONResponse(
        status_code=status_code,
        content=error_response.dict()
    )
@app.exception_handler(HTTPException)
 async def http_exception_handler(request: Request, exc: HTTPException):
    """Handle HTTP exceptions with sensitive information filtering."""
    # 过滤 HTTPException detail 中可能包含的敏感信息
    filtered_detail = OCIErrorHandler.filter_sensitive_info(str(exc.detail))
    error = ErrorDetail(
        message=filtered_detail,
        type="invalid_request_error",
        code=f"http_{exc.status_code}"
    )
    return JSONResponse(
        status_code=exc.status_code,
        content=ErrorResponse(error=error).dict()
    )
@app.exception_handler(RequestValidationError)
 async def validation_exception_handler(request: Request, exc: RequestValidationError):
    """Handle request validation errors."""
    logger.error(f"Validation error: {exc}")
    error = ErrorDetail(
        message=str(exc),
        type="invalid_request_error",
        code="validation_error"
    )
    return JSONResponse(
        status_code=400,
        content=ErrorResponse(error=error).dict()
    )
@app.exception_handler(Exception)
 async def general_exception_handler(request: Request, exc: Exception):
    """Handle general exceptions with sensitive information filtering."""
    logger.error(f"Unexpected error: {exc}", exc_info=True)
    # 通用错误也要过滤可能包含的敏感信息（完整错误已记录到日志）
    filtered_message = OCIErrorHandler.filter_sensitive_info(str(exc))
    error = ErrorDetail(
        message="An unexpected error occurred",  # 不暴露具体错误
        type="server_error",
        code="internal_error"
    )
    return JSONResponse(
        status_code=500,
        content=ErrorResponse(error=error).dict()
    )
 # Include routers
 app.include_router(models.router, prefix=settings.api_prefix)
 app.include_router(chat.router, prefix=settings.api_prefix)
 app.include_router(embeddings.router, prefix=settings.api_prefix)
@app.get("/")
 async def root():
    """Root endpoint."""
    return {
        "name": settings.api_title,
        "version": settings.api_version,
        "description": "OpenAI-compatible REST API for OCI Generative AI",
        "endpoints": {
            "models": f"{settings.api_prefix}/models",
            "chat": f"{settings.api_prefix}/chat/completions",
            "embeddings": f"{settings.api_prefix}/embeddings"
        }
    }
@app.get("/health")
 async def health():
    """Health check endpoint."""
    return {
        "status": "healthy",
        "service": "oci-genai-gateway"
    }
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        "main:app",
        host=settings.api_host,
        port=settings.api_port,
        reload=settings.debug,
        log_level=settings.log_level.lower()
    )