Replace install_deps.sh with start_server.py one-click deployment

2026-01-26 07:26:29 +00:00
parent d11d756b61
commit 1eb04f53a2
3 changed files with 192 additions and 62 deletions
@@ -1,23 +0,0 @@
-#!/bin/bash
-# Install system dependencies for VibeVoice vLLM plugin
-# Run this script inside the vLLM container before using the plugin
-
-set -e
-
-echo "Installing system dependencies for VibeVoice vLLM plugin..."
-
-# Update package list
-apt-get update
-
-# Install FFmpeg and audio processing libraries
-apt-get install -y \
-    ffmpeg \
-    libsndfile1 \
-    git
-
-echo "✅ System dependencies installed successfully!"
-echo ""
-echo "Next steps:"
-echo "  1. Install VibeVoice: pip install -e .[vllm]"
-echo "  2. Generate tokenizer files (if needed): python -m vllm_plugin.tools.generate_tokenizer_files -o /path/to/model"
-echo "  3. Start vLLM server: vllm serve <model_path> --trust-remote-code --enforce-eager --no-enable-prefix-caching"
@@ -0,0 +1,169 @@
+#!/usr/bin/env python3
+"""
+VibeVoice vLLM ASR Server Launcher
+
+One-click deployment script that handles:
+1. Installing system dependencies (FFmpeg, etc.)
+2. Installing VibeVoice Python package
+3. Downloading model from HuggingFace
+4. Generating tokenizer files
+5. Starting vLLM server
+
+Usage:
+    python3 start_server.py [--model MODEL_ID] [--port PORT]
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+
+def run_command(cmd: list[str], description: str, shell: bool = False) -> None:
+    """Run a command with logging."""
+    print(f"\n{'='*60}")
+    print(f"  {description}")
+    print(f"{'='*60}\n")
+    if shell:
+        subprocess.run(cmd, shell=True, check=True)
+    else:
+        subprocess.run(cmd, check=True)
+
+
+def install_system_deps() -> None:
+    """Install system dependencies (FFmpeg, etc.)."""
+    run_command(["apt-get", "update"], "Updating package list")
+    run_command(
+        ["apt-get", "install", "-y", "ffmpeg", "libsndfile1"],
+        "Installing FFmpeg and audio libraries"
+    )
+
+
+def install_vibevoice() -> None:
+    """Install VibeVoice Python package."""
+    run_command(
+        [sys.executable, "-m", "pip", "install", "-e", "/app[vllm]"],
+        "Installing VibeVoice with vLLM support"
+    )
+
+
+def download_model(model_id: str) -> str:
+    """Download model from HuggingFace using default cache."""
+    print(f"\n{'='*60}")
+    print(f"  Downloading model: {model_id}")
+    print(f"{'='*60}\n")
+    
+    import warnings
+    from huggingface_hub import snapshot_download
+    
+    # Suppress deprecation warnings from huggingface_hub
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        model_path = snapshot_download(model_id)
+    
+    print(f"\n{'='*60}")
+    print(f"  ✅ Model downloaded successfully!")
+    print(f"  📁 Path: {model_path}")
+    print(f"{'='*60}\n")
+    return model_path
+
+
+def generate_tokenizer(model_path: str) -> None:
+    """Generate tokenizer files for the model."""
+    run_command(
+        [sys.executable, "-m", "vllm_plugin.tools.generate_tokenizer_files", 
+         "--output", model_path],
+        "Generating tokenizer files"
+    )
+
+
+def start_vllm_server(model_path: str, port: int) -> None:
+    """Start vLLM server (replaces current process)."""
+    print(f"\n{'='*60}")
+    print(f"  Starting vLLM server on port {port}")
+    print(f"{'='*60}\n")
+    
+    vllm_cmd = [
+        "vllm", "serve", model_path,
+        "--served-model-name", "vibevoice",
+        "--trust-remote-code",
+        "--dtype", "bfloat16",
+        "--max-num-seqs", "64",
+        "--max-model-len", "65536",
+        "--max-num-batched-tokens", "32768",
+        "--gpu-memory-utilization", "0.8",
+        "--enforce-eager",
+        "--no-enable-prefix-caching",
+        "--enable-chunked-prefill",
+        "--chat-template-content-format", "openai",
+        "--tensor-parallel-size", "1",
+        "--allowed-local-media-path", "/app",
+        "--port", str(port),
+    ]
+    
+    os.execvp("vllm", vllm_cmd)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="VibeVoice vLLM ASR Server - One-Click Deployment",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    # Start with default settings
+    python3 start_server.py
+
+    # Use custom port
+    python3 start_server.py --port 8080
+
+    # Skip dependency installation (if already installed)
+    python3 start_server.py --skip-deps
+        """
+    )
+    parser.add_argument(
+        "--model", "-m",
+        default="microsoft/VibeVoice-ASR",
+        help="HuggingFace model ID (default: microsoft/VibeVoice-ASR)"
+    )
+    parser.add_argument(
+        "--port", "-p",
+        type=int,
+        default=8000,
+        help="Server port (default: 8000)"
+    )
+    parser.add_argument(
+        "--skip-deps",
+        action="store_true",
+        help="Skip installing system dependencies"
+    )
+    parser.add_argument(
+        "--skip-tokenizer",
+        action="store_true",
+        help="Skip generating tokenizer files"
+    )
+    args = parser.parse_args()
+
+    print("\n" + "="*60)
+    print("  VibeVoice vLLM ASR Server - One-Click Deployment")
+    print("="*60)
+
+    # Step 1: Install system dependencies
+    if not args.skip_deps:
+        install_system_deps()
+
+    # Step 2: Install VibeVoice
+    install_vibevoice()
+
+    # Step 3: Download model
+    model_path = download_model(args.model)
+
+    # Step 4: Generate tokenizer files
+    if not args.skip_tokenizer:
+        generate_tokenizer(model_path)
+
+    # Step 5: Start vLLM server
+    start_vllm_server(model_path, args.port)
+
+
+if __name__ == "__main__":
+    main()