diff --git a/docs/vibevoice-vllm-asr.md b/docs/vibevoice-vllm-asr.md
index bde87c8..9ed738b 100644
--- a/docs/vibevoice-vllm-asr.md
+++ b/docs/vibevoice-vllm-asr.md
@@ -47,9 +47,9 @@ The launcher supports two types of GPU parallelism via `--tp` and `--dp` flags:
 
 ### Data Parallel (Recommended for scaling throughput)
 
-Run 4 independent replicas on 4 GPUs with automatic load balancing behind a single port.
+Run N independent replicas on N GPUs with automatic load balancing behind a single port.
 When `--dp N` is specified (N > 1), the launcher automatically starts N independent vLLM
-processes behind an nginx reverse proxy for optimal throughput:
+processes behind an nginx reverse proxy (2×N workers) for optimal throughput:
 
 ```bash
 docker run -d --gpus '"device=0,1,2,3"' --name vibevoice-vllm \
diff --git a/vllm_plugin/scripts/start_server.py b/vllm_plugin/scripts/start_server.py
index 19e70d0..6c50377 100644
--- a/vllm_plugin/scripts/start_server.py
+++ b/vllm_plugin/scripts/start_server.py
@@ -146,11 +146,18 @@ def _install_nginx() -> None:
         )
 
 
-def _write_nginx_config(frontend_port: int, backend_ports: list[int]) -> str:
-    """Write nginx config for round-robin load balancing."""
+def _write_nginx_config(frontend_port: int, backend_ports: list[int],
+                        num_workers: int = 0) -> str:
+    """Write nginx config for round-robin load balancing.
+    
+    Args:
+        num_workers: Number of nginx worker processes. 0 = auto (2 × num backends).
+    """
+    if num_workers <= 0:
+        num_workers = len(backend_ports) * 2
     backends = "\n".join(f"        server 127.0.0.1:{p};" for p in backend_ports)
     config = textwrap.dedent(f"""\
-        worker_processes auto;
+        worker_processes {num_workers};
         worker_rlimit_nofile 65536;
         error_log /dev/stderr warn;
         pid /tmp/nginx.pid;