sitatech commited on
Commit
23eef01
·
1 Parent(s): ad248cf

[llm] Use model secrets for vllm api key

Browse files
Files changed (2) hide show
  1. llm/app.py +4 -1
  2. llm/configs.py +4 -7
llm/app.py CHANGED
@@ -14,6 +14,7 @@ from configs import (
14
 
15
  app = modal.App("vibe-shopping-llm")
16
 
 
17
  @app.function(
18
  image=vllm_image,
19
  gpu=f"H100:{N_GPU}",
@@ -26,6 +27,7 @@ app = modal.App("vibe-shopping-llm")
26
  "/root/.cache/huggingface": hf_cache_vol,
27
  "/root/.cache/vllm": vllm_cache_vol,
28
  },
 
29
  )
30
  @modal.concurrent(
31
  max_inputs=50 # maximum number of concurrent requests per aut-scaling replica
@@ -33,6 +35,7 @@ app = modal.App("vibe-shopping-llm")
33
  @modal.web_server(port=VLLM_PORT, startup_timeout=5 * MINUTE)
34
  def serve():
35
  import subprocess
 
36
 
37
  cmd = [
38
  "vllm",
@@ -46,7 +49,7 @@ def serve():
46
  "--port",
47
  str(VLLM_PORT),
48
  "--api-key",
49
- API_KEY,
50
  ]
51
 
52
  subprocess.Popen(" ".join(cmd), shell=True)
 
14
 
15
  app = modal.App("vibe-shopping-llm")
16
 
17
+
18
  @app.function(
19
  image=vllm_image,
20
  gpu=f"H100:{N_GPU}",
 
27
  "/root/.cache/huggingface": hf_cache_vol,
28
  "/root/.cache/vllm": vllm_cache_vol,
29
  },
30
+ secrets=[API_KEY],
31
  )
32
  @modal.concurrent(
33
  max_inputs=50 # maximum number of concurrent requests per aut-scaling replica
 
35
  @modal.web_server(port=VLLM_PORT, startup_timeout=5 * MINUTE)
36
  def serve():
37
  import subprocess
38
+ import os
39
 
40
  cmd = [
41
  "vllm",
 
49
  "--port",
50
  str(VLLM_PORT),
51
  "--api-key",
52
+ os.environ["API_KEY"],
53
  ]
54
 
55
  subprocess.Popen(" ".join(cmd), shell=True)
llm/configs.py CHANGED
@@ -10,7 +10,7 @@ vllm_image = (
10
  .env(
11
  {
12
  "HF_HUB_ENABLE_HF_TRANSFER": "1",
13
- "VLLM_USE_V1": "1",
14
  }
15
  )
16
  )
@@ -22,10 +22,7 @@ MODEL_REVISION = "3f96d104cdf17d4697995d2848efe6d313494ce5"
22
  hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True)
23
  vllm_cache_vol = modal.Volume.from_name("vllm-cache", create_if_missing=True)
24
 
25
-
26
  N_GPU = 1
27
- API_KEY = modal.secret.Secret("vllm_api_key")
28
-
29
- MINUTE = 60
30
-
31
- VLLM_PORT = 8000
 
10
  .env(
11
  {
12
  "HF_HUB_ENABLE_HF_TRANSFER": "1",
13
+ "VLLM_USE_V1": "1",
14
  }
15
  )
16
  )
 
22
  hf_cache_vol = modal.Volume.from_name("huggingface-cache", create_if_missing=True)
23
  vllm_cache_vol = modal.Volume.from_name("vllm-cache", create_if_missing=True)
24
 
 
25
  N_GPU = 1
26
+ API_KEY = modal.Secret.from_name("vibe-shopping-secrets", required_keys=["API_KEY"])
27
+ MINUTE = 60
28
+ VLLM_PORT = 8000