Spaces:

sitatech
/

vibe-shopping

Running

App Files Files Community

sitatech commited on Jun 14

Commit

86e4f57

1 Parent(s): 5145e05

[llm] Update vLLM configs and Qwen chat template to enable custom tool parsing

Browse files

Files changed (2) hide show

llm/app.py +9 -15
llm/configs.py +2 -0

llm/app.py CHANGED Viewed

@@ -10,6 +10,7 @@ from configs import (
     API_KEY,
     VLLM_PORT,
     flashinfer_cache_vol,
 )
@@ -40,15 +41,6 @@ def serve_llm():
     import os
     import torch
-    # chat_template_path = "/root/chat_template.jinja"
-    # if not os.path.exists(chat_template_path):
-    #     print("Downloading chat template...")
-    #     url = "https://raw.githubusercontent.com/edwardzjl/chat-templates/refs/heads/main/qwen2_5/chat_template.jinja"
-    #     response = requests.get(url)
-    #     response.raise_for_status()
-    #     with open(chat_template_path, "w") as f:
-    #         f.write(response.text)
     min_pixels = 128 * 28 * 28  # min 128 tokens
     max_pixels = 340 * 28 * 28  # max 340 tokens (~512x512 image)
@@ -67,14 +59,10 @@ def serve_llm():
         "--enable-auto-tool-choice",
         "--limit-mm-per-prompt",
         "image=100",
         "--tensor-parallel-size",
         str(N_GPU),
-        "--host",
-        "0.0.0.0",
-        "--port",
-        str(VLLM_PORT),
-        "--api-key",
-        os.environ["API_KEY"],
         "--enforce-eager",
         # Minimize token usage
         "--mm-processor-kwargs",
@@ -84,6 +72,12 @@ def serve_llm():
         # '{"rope_type":"yarn","factor":2.0,"original_max_position_embeddings":32768}',
         "--max-model-len",
         "32768",
     ]
     subprocess.Popen(cmd)

     API_KEY,
     VLLM_PORT,
     flashinfer_cache_vol,
+    CHAT_TEMPLATE,
 )
     import os
     import torch
     min_pixels = 128 * 28 * 28  # min 128 tokens
     max_pixels = 340 * 28 * 28  # max 340 tokens (~512x512 image)
         "--enable-auto-tool-choice",
         "--limit-mm-per-prompt",
         "image=100",
+        "--chat-template",
+        CHAT_TEMPLATE,
         "--tensor-parallel-size",
         str(N_GPU),
         "--enforce-eager",
         # Minimize token usage
         "--mm-processor-kwargs",
         # '{"rope_type":"yarn","factor":2.0,"original_max_position_embeddings":32768}',
         "--max-model-len",
         "32768",
+        "--host",
+        "0.0.0.0",
+        "--port",
+        str(VLLM_PORT),
+        "--api-key",
+        os.environ["API_KEY"],
     ]
     subprocess.Popen(cmd)

llm/configs.py CHANGED Viewed

@@ -39,3 +39,5 @@ API_KEY = modal.Secret.from_name(
 )
 MINUTE = 60
 VLLM_PORT = 8000

 )
 MINUTE = 60
 VLLM_PORT = 8000
+CHAT_TEMPLATE = "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\nTools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments based on the provided signatures within <tool-call></tool-call> XML tags:\\n<tool-call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool-call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool-calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool-call in message.tool-calls %}\n            {%- if tool-call.function is defined %}\n                {%- set tool-call = tool-call.function %}\n            {%- endif %}\n            {{- '\\n<tool-call>\\n{\"name\": \"' }}\n            {{- tool-call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool-call.arguments | tojson }}\n            {{- '}\\n</tool-call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n"