fix backend

This commit is contained in:
YingboHAO
2026-02-08 09:58:19 +00:00
parent 0508c3e86f
commit a4add8e52f
2 changed files with 82 additions and 17 deletions
+2 -2
View File
@@ -90,9 +90,9 @@ def start_vllm_server(model_path: str, port: int) -> None:
"--dtype", "bfloat16",
"--max-num-seqs", "64",
"--max-model-len", "65536",
"--max-num-batched-tokens", "32768",
# "--max-num-batched-tokens", "32768",
"--gpu-memory-utilization", "0.8",
"--enforce-eager",
# "--enforce-eager",
"--no-enable-prefix-caching",
"--enable-chunked-prefill",
"--chat-template-content-format", "openai",