Files
VibeVoice/demo/vibevoice_realtime_colab.ipynb
T
YaoyaoChang ce90a49960 fix env bug
2026-01-21 22:03:52 -08:00

217 lines
6.7 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"id": "d1785adb",
"metadata": {
"colab_type": "text",
"id": "view-in-github"
},
"source": [
"<a href=\"https://colab.research.google.com/github/microsoft/VibeVoice/blob/main/demo/vibevoice_realtime_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"id": "WvIaUJD2y0yU",
"metadata": {
"id": "WvIaUJD2y0yU"
},
"source": [
"# VibeVoice-Realtime Colab — T4 Quickstart\n",
"\n"
]
},
{
"cell_type": "markdown",
"id": "e8fTKYGx7DZk",
"metadata": {
"id": "e8fTKYGx7DZk"
},
"source": [
"## Step 1: Setup Environment"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4wxJ6QHM-ZOb",
"metadata": {
"id": "4wxJ6QHM-ZOb"
},
"outputs": [],
"source": [
"# Check for T4 GPU\n",
"import torch\n",
"if torch.cuda.is_available() and \"T4\" in torch.cuda.get_device_name(0):\n",
" print(\"✅ T4 GPU detected\")\n",
"else:\n",
" print(\"\"\"\n",
" ⚠️ WARNING: T4 GPU not detected\n",
"\n",
" The recommended runtime for this Colab notebook is \"T4 GPU\".\n",
"\n",
" To change the runtime type:\n",
"\n",
" 1. Click on \"Runtime\" in the top navigation menu\n",
" 2. Click on \"Change runtime type\"\n",
" 3. Select \"T4 GPU\"\n",
" 4. Click \"OK\" if a \"Disconnect and delete runtime\" window appears\n",
" 5. Click on \"Save\"\n",
"\n",
" \"\"\")\n",
"\n",
"# Clone the VibeVoice repository\n",
"![ -d /content/VibeVoice ] || git clone --quiet --branch main --depth 1 https://github.com/microsoft/VibeVoice.git /content/VibeVoice\n",
"print(\"✅ Cloned VibeVoice repository\")\n",
"\n",
"# Install project dependencies\n",
"!uv pip --quiet install --system -e /content/VibeVoice[tts]\n",
"!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared && chmod +x cloudflared\n",
"print(\"✅ Installed dependencies\")\n",
"\n",
"# Download model\n",
"from huggingface_hub import snapshot_download\n",
"snapshot_download(\"microsoft/VibeVoice-Realtime-0.5B\", local_dir=\"/content/models/VibeVoice-Realtime-0.5B\")\n",
"print(\"✅ Downloaded model: microsoft/VibeVoice-Realtime-0.5B\")\n"
]
},
{
"cell_type": "markdown",
"id": "88c727ab",
"metadata": {},
"source": [
"[Optional] If the download exceeds 1 minute, it is probably stuck. You can: (1) interrupt the execution, (2) log in to Hugging Face, and (3) try download again."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dec6b870",
"metadata": {},
"outputs": [],
"source": [
"from huggingface_hub import login, snapshot_download\n",
"login()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c579654b",
"metadata": {},
"outputs": [],
"source": [
"snapshot_download(\"microsoft/VibeVoice-Realtime-0.5B\", local_dir=\"/content/models/VibeVoice-Realtime-0.5B\")\n",
"print(\"✅ Downloaded model: microsoft/VibeVoice-Realtime-0.5B\")"
]
},
{
"cell_type": "markdown",
"id": "dfe30d6f",
"metadata": {},
"source": [
"[Optional] More experimental voices"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bb33c9ce",
"metadata": {},
"outputs": [],
"source": [
"!bash /content/VibeVoice/demo/download_experimental_voices.sh"
]
},
{
"cell_type": "markdown",
"id": "pgKlV7153Ifi",
"metadata": {
"id": "pgKlV7153Ifi"
},
"source": [
"## Step 2: Launch VibeVoice-Realtime Demo"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "Yc1N9EHswFxA",
"metadata": {
"id": "Yc1N9EHswFxA"
},
"outputs": [],
"source": [
"import subprocess, re, time, threading\n",
"\n",
"srv = subprocess.Popen(\n",
" \"python /content/VibeVoice/demo/vibevoice_realtime_demo.py --model_path /content/models/VibeVoice-Realtime-0.5B --port 8000\",\n",
" shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True,\n",
")\n",
"cf = subprocess.Popen(\n",
" \"./cloudflared tunnel --url http://localhost:8000 --no-autoupdate\",\n",
" shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True,\n",
")\n",
"\n",
"public_url = None\n",
"server_ready = False\n",
"url_pattern = re.compile(r\"(https://[a-z0-9-]+\\.trycloudflare\\.com)\")\n",
"\n",
"def read_srv():\n",
" global server_ready\n",
" for ln in srv.stdout:\n",
" print(ln.strip())\n",
" if \"Uvicorn running on\" in ln:\n",
" server_ready = True\n",
"\n",
"def read_cf():\n",
" global public_url\n",
" for ln in cf.stdout:\n",
" m = url_pattern.search(ln)\n",
" if m:\n",
" public_url = m.group(1)\n",
" break\n",
"\n",
"threading.Thread(target=read_srv, daemon=True).start()\n",
"threading.Thread(target=read_cf, daemon=True).start()\n",
"\n",
"\n",
"while True:\n",
" if server_ready and public_url:\n",
" print(f\"✅ Public URL: {public_url}\\n\");\n",
" public_url = None\n",
" time.sleep(0.25)"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"include_colab_link": true,
"machine_shape": "hm",
"name": "VibeVoice_Colab.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}