fix backend
This commit is contained in:
@@ -90,9 +90,9 @@ def start_vllm_server(model_path: str, port: int) -> None:
|
||||
"--dtype", "bfloat16",
|
||||
"--max-num-seqs", "64",
|
||||
"--max-model-len", "65536",
|
||||
"--max-num-batched-tokens", "32768",
|
||||
# "--max-num-batched-tokens", "32768",
|
||||
"--gpu-memory-utilization", "0.8",
|
||||
"--enforce-eager",
|
||||
# "--enforce-eager",
|
||||
"--no-enable-prefix-caching",
|
||||
"--enable-chunked-prefill",
|
||||
"--chat-template-content-format", "openai",
|
||||
|
||||
Reference in New Issue
Block a user