背景:
vllm推理框架启动模型不具备api-key验证。需借助fastapi可以实现该功能
代码实现:
rom fastapi import FastAPI, Header, HTTPException, Request,Response
import httpx
import logging
# 创建 FastAPI 应用
app = FastAPI()
logging.basicConfig(level=logging.DEBUG)
# 配置 vLLM 的服务地址
VLLM_BASE_URL = "http://localhost:25010"
# 定义合法的 API Key 列表(可以根据需要扩展为数据库或配置文件)
VALID_API_KEYS = {"zml_123456789", "zml_1234567890"}
# 中间件:验证 API Key
# 验证 API Key
async def verify_api_key(authorization: str = Header(None)):
# 打印接收到的 Authorization 字段
logging.debug(f"Received Authorization header: {authorization}")
# 检查 Authorization 是否存在且以 "Bearer " 开头
if not authorization or not isinstance(authorization, str) or not authorization.startswith("Bearer "):
raise HTTPException(status_code=403, detail="Invalid Authorization Header")
# 提取 API Key
try:
api_key = authorization.split(" ")[1] # 提取 "Bearer " 后的部分
except IndexError:
raise HTTPException(status_code=403, detail="Malformed Authorization Header")
# 验证 API Key 是否合法
if api_key not in VALID_API_KEYS:
raise HTTPException(status_code=403, detail="Invalid API Key")
# 代理路由:转发请求到 vLLM
@app.api_route("/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
async def proxy(request: Request, path: str, authorization: str = Header(None)):
# 验证 API Key
logging.debug(f"Received request with path: {path}")
logging.debug(f"Received headers: {request.headers}")
await verify_api_key(authorization)
# 构造转发的目标 URL
target_url = f"{VLLM_BASE_URL}/{path}"
# 获取请求体和查询参数
body = await request.body()
query_params = request.query_params
# 使用 httpx 转发请求
async with httpx.AsyncClient() as client:
# 根据请求方法转发
response = await client.request(
method=request.method,
url=target_url,
params=query_params,
data=body,
headers={key: value for key, value in request.headers.items() if key != "host"}
)
# 返回 vLLM 的响应
return Response(content=response.content, status_code=response.status_code, headers=dict(response.headers))
三、启动
uvicorn my_fastapi:app --host=0.0.0.0 --port=12345
# my_fastapi 为脚本名称
通过访问fastapi提供的12345即可实现改功能