Spaces:
Running
Running
[llm] Use model secrets for vllm api key
Browse files- llm/app.py +4 -1
- llm/configs.py +4 -7
llm/app.py
CHANGED
|
@@ -14,6 +14,7 @@ from configs import (
|
|
| 14 |
|
| 15 |
app = modal.App("vibe-shopping-llm")
|
| 16 |
|
|
|
|
| 17 |
@app.function(
|
| 18 |
image=vllm_image,
|
| 19 |
gpu=f"H100:{N_GPU}",
|
|
@@ -26,6 +27,7 @@ app = modal.App("vibe-shopping-llm")
|
|
| 26 |
"/root/.cache/huggingface": hf_cache_vol,
|
| 27 |
"/root/.cache/vllm": vllm_cache_vol,
|
| 28 |
},
|
|
|
|
| 29 |
)
|
| 30 |
@modal.concurrent(
|
| 31 |
max_inputs=50 # maximum number of concurrent requests per aut-scaling replica
|
|
@@ -33,6 +35,7 @@ app = modal.App("vibe-shopping-llm")
|
|
| 33 |
@modal.web_server(port=VLLM_PORT, startup_timeout=5 * MINUTE)
|
| 34 |
def serve():
|
| 35 |
import subprocess
|
|
|
|
| 36 |
|
| 37 |
cmd = [
|
| 38 |
"vllm",
|
|
@@ -46,7 +49,7 @@ def serve():
|
|
| 46 |
"--port",
|
| 47 |
str(VLLM_PORT),
|
| 48 |
"--api-key",
|
| 49 |
-
API_KEY,
|
| 50 |
]
|
| 51 |
|
| 52 |
subprocess.Popen(" ".join(cmd), shell=True)
|
|
|
|
| 14 |
|
| 15 |
app = modal.App("vibe-shopping-llm")
|
| 16 |
|
| 17 |
+
|
| 18 |
@app.function(
|
| 19 |
image=vllm_image,
|
| 20 |
gpu=f"H100:{N_GPU}",
|
|
|
|
| 27 |
"/root/.cache/huggingface": hf_cache_vol,
|
| 28 |
"/root/.cache/vllm": vllm_cache_vol,
|
| 29 |
},
|
| 30 |
+
secrets=[API_KEY],
|
| 31 |
)
|
| 32 |
@modal.concurrent(
|
| 33 |
max_inputs=50 # maximum number of concurrent requests per aut-scaling replica
|
|
|
|
| 35 |
@modal.web_server(port=VLLM_PORT, startup_timeout=5 * MINUTE)
|
| 36 |
def serve():
|
| 37 |
import subprocess
|
| 38 |
+
import os
|
| 39 |
|
| 40 |
cmd = [
|
| 41 |
"vllm",
|
|
|
|
| 49 |
"--port",
|
| 50 |
str(VLLM_PORT),
|
| 51 |
"--api-key",
|
| 52 |
+
os.environ["API_KEY"],
|
| 53 |
]
|
| 54 |
|
| 55 |
subprocess.Popen(" ".join(cmd), shell=True)
|
llm/configs.py
CHANGED
|
@@ -10,7 +10,7 @@ vllm_image = (
|
|
| 10 |
.env(
|
| 11 |
{
|
| 12 |
"HF_HUB_ENABLE_HF_TRANSFER": "1",
|
| 13 |
-
"VLLM_USE_V1": "1",
|
| 14 |
}
|
| 15 |
)
|
| 16 |
)
|
|
@@ -22,10 +22,7 @@ MODEL_REVISION = "3f96d104cdf17d4697995d2848efe6d313494ce5"
|
|
| 22 |
hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True)
|
| 23 |
vllm_cache_vol = modal.Volume.from_name("vllm-cache", create_if_missing=True)
|
| 24 |
|
| 25 |
-
|
| 26 |
N_GPU = 1
|
| 27 |
-
API_KEY = modal.
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
VLLM_PORT = 8000
|
|
|
|
| 10 |
.env(
|
| 11 |
{
|
| 12 |
"HF_HUB_ENABLE_HF_TRANSFER": "1",
|
| 13 |
+
"VLLM_USE_V1": "1",
|
| 14 |
}
|
| 15 |
)
|
| 16 |
)
|
|
|
|
| 22 |
hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True)
|
| 23 |
vllm_cache_vol = modal.Volume.from_name("vllm-cache", create_if_missing=True)
|
| 24 |
|
|
|
|
| 25 |
N_GPU = 1
|
| 26 |
+
API_KEY = modal.Secret.from_name("vibe-shopping-secrets", required_keys=["API_KEY"])
|
| 27 |
+
MINUTE = 60
|
| 28 |
+
VLLM_PORT = 8000
|
|
|
|
|
|