170 lines
4.5 KiB
Python
170 lines
4.5 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
VibeVoice vLLM ASR Server Launcher
|
|
|
|
One-click deployment script that handles:
|
|
1. Installing system dependencies (FFmpeg, etc.)
|
|
2. Installing VibeVoice Python package
|
|
3. Downloading model from HuggingFace
|
|
4. Generating tokenizer files
|
|
5. Starting vLLM server
|
|
|
|
Usage:
|
|
python3 start_server.py [--model MODEL_ID] [--port PORT]
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
|
|
|
|
def run_command(cmd: list[str], description: str, shell: bool = False) -> None:
|
|
"""Run a command with logging."""
|
|
print(f"\n{'='*60}")
|
|
print(f" {description}")
|
|
print(f"{'='*60}\n")
|
|
if shell:
|
|
subprocess.run(cmd, shell=True, check=True)
|
|
else:
|
|
subprocess.run(cmd, check=True)
|
|
|
|
|
|
def install_system_deps() -> None:
|
|
"""Install system dependencies (FFmpeg, etc.)."""
|
|
run_command(["apt-get", "update"], "Updating package list")
|
|
run_command(
|
|
["apt-get", "install", "-y", "ffmpeg", "libsndfile1"],
|
|
"Installing FFmpeg and audio libraries"
|
|
)
|
|
|
|
|
|
def install_vibevoice() -> None:
|
|
"""Install VibeVoice Python package."""
|
|
run_command(
|
|
[sys.executable, "-m", "pip", "install", "-e", "/app[vllm]"],
|
|
"Installing VibeVoice with vLLM support"
|
|
)
|
|
|
|
|
|
def download_model(model_id: str) -> str:
|
|
"""Download model from HuggingFace using default cache."""
|
|
print(f"\n{'='*60}")
|
|
print(f" Downloading model: {model_id}")
|
|
print(f"{'='*60}\n")
|
|
|
|
import warnings
|
|
from huggingface_hub import snapshot_download
|
|
|
|
# Suppress deprecation warnings from huggingface_hub
|
|
with warnings.catch_warnings():
|
|
warnings.simplefilter("ignore")
|
|
model_path = snapshot_download(model_id)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f" ✅ Model downloaded successfully!")
|
|
print(f" 📁 Path: {model_path}")
|
|
print(f"{'='*60}\n")
|
|
return model_path
|
|
|
|
|
|
def generate_tokenizer(model_path: str) -> None:
|
|
"""Generate tokenizer files for the model."""
|
|
run_command(
|
|
[sys.executable, "-m", "vllm_plugin.tools.generate_tokenizer_files",
|
|
"--output", model_path],
|
|
"Generating tokenizer files"
|
|
)
|
|
|
|
|
|
def start_vllm_server(model_path: str, port: int) -> None:
|
|
"""Start vLLM server (replaces current process)."""
|
|
print(f"\n{'='*60}")
|
|
print(f" Starting vLLM server on port {port}")
|
|
print(f"{'='*60}\n")
|
|
|
|
vllm_cmd = [
|
|
"vllm", "serve", model_path,
|
|
"--served-model-name", "vibevoice",
|
|
"--trust-remote-code",
|
|
"--dtype", "bfloat16",
|
|
"--max-num-seqs", "64",
|
|
"--max-model-len", "65536",
|
|
# "--max-num-batched-tokens", "32768",
|
|
"--gpu-memory-utilization", "0.8",
|
|
# "--enforce-eager",
|
|
"--no-enable-prefix-caching",
|
|
"--enable-chunked-prefill",
|
|
"--chat-template-content-format", "openai",
|
|
"--tensor-parallel-size", "1",
|
|
"--allowed-local-media-path", "/app",
|
|
"--port", str(port),
|
|
]
|
|
|
|
os.execvp("vllm", vllm_cmd)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="VibeVoice vLLM ASR Server - One-Click Deployment",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Start with default settings
|
|
python3 start_server.py
|
|
|
|
# Use custom port
|
|
python3 start_server.py --port 8080
|
|
|
|
# Skip dependency installation (if already installed)
|
|
python3 start_server.py --skip-deps
|
|
"""
|
|
)
|
|
parser.add_argument(
|
|
"--model", "-m",
|
|
default="microsoft/VibeVoice-ASR",
|
|
help="HuggingFace model ID (default: microsoft/VibeVoice-ASR)"
|
|
)
|
|
parser.add_argument(
|
|
"--port", "-p",
|
|
type=int,
|
|
default=8000,
|
|
help="Server port (default: 8000)"
|
|
)
|
|
parser.add_argument(
|
|
"--skip-deps",
|
|
action="store_true",
|
|
help="Skip installing system dependencies"
|
|
)
|
|
parser.add_argument(
|
|
"--skip-tokenizer",
|
|
action="store_true",
|
|
help="Skip generating tokenizer files"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
print("\n" + "="*60)
|
|
print(" VibeVoice vLLM ASR Server - One-Click Deployment")
|
|
print("="*60)
|
|
|
|
# Step 1: Install system dependencies
|
|
if not args.skip_deps:
|
|
install_system_deps()
|
|
|
|
# Step 2: Install VibeVoice
|
|
install_vibevoice()
|
|
|
|
# Step 3: Download model
|
|
model_path = download_model(args.model)
|
|
|
|
# Step 4: Generate tokenizer files
|
|
if not args.skip_tokenizer:
|
|
generate_tokenizer(model_path)
|
|
|
|
# Step 5: Start vLLM server
|
|
start_vllm_server(model_path, args.port)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|