fix backend

2026-02-08 09:58:19 +00:00
parent 0508c3e86f
commit a4add8e52f
2 changed files with 82 additions and 17 deletions
@@ -90,9 +90,9 @@ def start_vllm_server(model_path: str, port: int) -> None:
        "--dtype", "bfloat16",
        "--max-num-seqs", "64",
        "--max-model-len", "65536",
-        "--max-num-batched-tokens", "32768",
+        # "--max-num-batched-tokens", "32768",
        "--gpu-memory-utilization", "0.8",
-        "--enforce-eager",
+        # "--enforce-eager",
        "--no-enable-prefix-caching",
        "--enable-chunked-prefill",
        "--chat-template-content-format", "openai",