From 73ccef95a1db37932ed9dbce83d4189e7909419c Mon Sep 17 00:00:00 2001 From: RossAlRed Date: Fri, 3 Nov 2023 12:07:25 +0000 Subject: [PATCH 1/2] limit_concurrency --- llama_cpp/server/app.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index bec956147..031f0560a 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -157,6 +157,10 @@ class Settings(BaseSettings): ) # Server Params host: str = Field(default="localhost", description="Listen address") + limit_concurrency: int = Field( + default=2, + description="Number of concurrent processes + 1" + ) port: int = Field(default=8000, description="Listen port") interrupt_requests: bool = Field( default=True, From 79e7a7c07229f5d4bc302cb9a7ad2c7ae131e508 Mon Sep 17 00:00:00 2001 From: RossAlRed Date: Fri, 3 Nov 2023 12:08:03 +0000 Subject: [PATCH 2/2] limit_concurrency --- llama_cpp/server/__main__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index a294ebf8a..df54e0326 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -96,5 +96,8 @@ def parse_bool_arg(arg): app = create_app(settings=settings) uvicorn.run( - app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)) + app, + host=os.getenv("HOST", settings.host), + port=int(os.getenv("PORT", settings.port)), + limit_concurrency=int(os.getenv("LIMIT_CONCURRENCY", settings.limit_concurrency)) )