server.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. # Copyright 2026 The HuggingFace Team. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. FastAPI app factory.
  16. """
  17. import uuid
  18. from contextlib import asynccontextmanager
  19. from ...utils import logging
  20. from ...utils.import_utils import is_serve_available
  21. if is_serve_available():
  22. from fastapi import FastAPI, Request
  23. from fastapi.middleware.cors import CORSMiddleware
  24. from fastapi.responses import JSONResponse, StreamingResponse
  25. from .chat_completion import ChatCompletionHandler
  26. from .model_manager import ModelManager
  27. from .response import ResponseHandler
  28. from .transcription import TranscriptionHandler
  29. from .utils import X_REQUEST_ID
  30. logger = logging.get_logger(__name__)
  31. def build_server(
  32. model_manager: ModelManager,
  33. chat_handler: ChatCompletionHandler,
  34. response_handler: ResponseHandler,
  35. transcription_handler: TranscriptionHandler,
  36. enable_cors: bool = False,
  37. ) -> FastAPI:
  38. """Build and return a configured FastAPI application.
  39. Args:
  40. model_manager: Handles model loading, caching, and cleanup.
  41. chat_handler: Handles `/v1/chat/completions` requests.
  42. response_handler: Handles `/v1/responses` requests.
  43. enable_cors: If `True`, adds permissive CORS middleware (allow all origins).
  44. Returns:
  45. A FastAPI app ready to be passed to uvicorn.
  46. """
  47. @asynccontextmanager
  48. async def lifespan(app: FastAPI):
  49. yield
  50. model_manager.shutdown()
  51. app = FastAPI(lifespan=lifespan)
  52. if enable_cors:
  53. app.add_middleware(
  54. CORSMiddleware,
  55. allow_origins=["*"],
  56. allow_credentials=True,
  57. allow_methods=["*"],
  58. allow_headers=["*"],
  59. )
  60. logger.warning_once("CORS allow origin is set to `*`. Not recommended for production.")
  61. # ---- Middleware ----
  62. @app.middleware("http")
  63. async def request_id_middleware(request: Request, call_next):
  64. """Get or set the request ID in the header."""
  65. request_id = request.headers.get(X_REQUEST_ID) or str(uuid.uuid4())
  66. request.state.request_id = request_id
  67. response = await call_next(request)
  68. response.headers[X_REQUEST_ID] = request_id
  69. return response
  70. # ---- Routes ----
  71. @app.post("/v1/chat/completions")
  72. async def chat_completions(request: Request, body: dict):
  73. return await chat_handler.handle_request(body, request.state.request_id)
  74. @app.post("/v1/responses")
  75. async def responses(request: Request, body: dict):
  76. return await response_handler.handle_request(body, request.state.request_id)
  77. @app.post("/v1/audio/transcriptions")
  78. async def audio_transcriptions(request: Request):
  79. return await transcription_handler.handle_request(request)
  80. @app.post("/load_model")
  81. async def load_model(body: dict):
  82. from fastapi import HTTPException
  83. model = body.get("model")
  84. if model is None:
  85. raise HTTPException(status_code=422, detail="Missing `model` field in the request body.")
  86. model_id_and_revision = model_manager.process_model_name(model)
  87. return StreamingResponse(
  88. model_manager.load_model_streaming(model_id_and_revision), media_type="text/event-stream"
  89. )
  90. @app.post("/reset")
  91. def reset():
  92. model_manager.shutdown()
  93. return JSONResponse({"status": "ok"})
  94. @app.get("/v1/models")
  95. @app.options("/v1/models")
  96. def list_models():
  97. return JSONResponse({"object": "list", "data": model_manager.get_gen_models()})
  98. @app.get("/health")
  99. def health():
  100. return JSONResponse({"status": "ok"})
  101. return app