217 lines
6.7 KiB
Plaintext
217 lines
6.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "d1785adb",
|
|
"metadata": {
|
|
"colab_type": "text",
|
|
"id": "view-in-github"
|
|
},
|
|
"source": [
|
|
"<a href=\"https://colab.research.google.com/github/microsoft/VibeVoice/blob/main/demo/vibevoice_realtime_colab.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "WvIaUJD2y0yU",
|
|
"metadata": {
|
|
"id": "WvIaUJD2y0yU"
|
|
},
|
|
"source": [
|
|
"# VibeVoice-Realtime Colab — T4 Quickstart\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e8fTKYGx7DZk",
|
|
"metadata": {
|
|
"id": "e8fTKYGx7DZk"
|
|
},
|
|
"source": [
|
|
"## Step 1: Setup Environment"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "4wxJ6QHM-ZOb",
|
|
"metadata": {
|
|
"id": "4wxJ6QHM-ZOb"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Check for T4 GPU\n",
|
|
"import torch\n",
|
|
"if torch.cuda.is_available() and \"T4\" in torch.cuda.get_device_name(0):\n",
|
|
" print(\"✅ T4 GPU detected\")\n",
|
|
"else:\n",
|
|
" print(\"\"\"\n",
|
|
" ⚠️ WARNING: T4 GPU not detected\n",
|
|
"\n",
|
|
" The recommended runtime for this Colab notebook is \"T4 GPU\".\n",
|
|
"\n",
|
|
" To change the runtime type:\n",
|
|
"\n",
|
|
" 1. Click on \"Runtime\" in the top navigation menu\n",
|
|
" 2. Click on \"Change runtime type\"\n",
|
|
" 3. Select \"T4 GPU\"\n",
|
|
" 4. Click \"OK\" if a \"Disconnect and delete runtime\" window appears\n",
|
|
" 5. Click on \"Save\"\n",
|
|
"\n",
|
|
" \"\"\")\n",
|
|
"\n",
|
|
"# Clone the VibeVoice repository\n",
|
|
"![ -d /content/VibeVoice ] || git clone --quiet --branch main --depth 1 https://github.com/microsoft/VibeVoice.git /content/VibeVoice\n",
|
|
"print(\"✅ Cloned VibeVoice repository\")\n",
|
|
"\n",
|
|
"# Install project dependencies\n",
|
|
"!uv pip --quiet install --system -e /content/VibeVoice\n",
|
|
"!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O cloudflared && chmod +x cloudflared\n",
|
|
"print(\"✅ Installed dependencies\")\n",
|
|
"\n",
|
|
"# Download model\n",
|
|
"from huggingface_hub import snapshot_download\n",
|
|
"snapshot_download(\"microsoft/VibeVoice-Realtime-0.5B\", local_dir=\"/content/models/VibeVoice-Realtime-0.5B\")\n",
|
|
"print(\"✅ Downloaded model: microsoft/VibeVoice-Realtime-0.5B\")\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "88c727ab",
|
|
"metadata": {},
|
|
"source": [
|
|
"[Optional] If the download exceeds 1 minute, it is probably stuck. You can: (1) interrupt the execution, (2) log in to Hugging Face, and (3) try download again."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "dec6b870",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from huggingface_hub import login\n",
|
|
"login()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c579654b",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"snapshot_download(\"microsoft/VibeVoice-Realtime-0.5B\", local_dir=\"/content/models/VibeVoice-Realtime-0.5B\")\n",
|
|
"print(\"✅ Downloaded model: microsoft/VibeVoice-Realtime-0.5B\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "dfe30d6f",
|
|
"metadata": {},
|
|
"source": [
|
|
"[Optional] More experimental voices"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "bb33c9ce",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!bash /content/VibeVoice/demo/download_experimental_voices.sh"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "pgKlV7153Ifi",
|
|
"metadata": {
|
|
"id": "pgKlV7153Ifi"
|
|
},
|
|
"source": [
|
|
"## Step 2: Launch VibeVoice-Realtime Demo"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "Yc1N9EHswFxA",
|
|
"metadata": {
|
|
"id": "Yc1N9EHswFxA"
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import subprocess, re, time, threading\n",
|
|
"\n",
|
|
"srv = subprocess.Popen(\n",
|
|
" \"python /content/VibeVoice/demo/vibevoice_realtime_demo.py --model_path /content/models/VibeVoice-Realtime-0.5B --port 8000\",\n",
|
|
" shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True,\n",
|
|
")\n",
|
|
"cf = subprocess.Popen(\n",
|
|
" \"./cloudflared tunnel --url http://localhost:8000 --no-autoupdate\",\n",
|
|
" shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True,\n",
|
|
")\n",
|
|
"\n",
|
|
"public_url = None\n",
|
|
"server_ready = False\n",
|
|
"url_pattern = re.compile(r\"(https://[a-z0-9-]+\\.trycloudflare\\.com)\")\n",
|
|
"\n",
|
|
"def read_srv():\n",
|
|
" global server_ready\n",
|
|
" for ln in srv.stdout:\n",
|
|
" print(ln.strip())\n",
|
|
" if \"Uvicorn running on\" in ln:\n",
|
|
" server_ready = True\n",
|
|
"\n",
|
|
"def read_cf():\n",
|
|
" global public_url\n",
|
|
" for ln in cf.stdout:\n",
|
|
" m = url_pattern.search(ln)\n",
|
|
" if m:\n",
|
|
" public_url = m.group(1)\n",
|
|
" break\n",
|
|
"\n",
|
|
"threading.Thread(target=read_srv, daemon=True).start()\n",
|
|
"threading.Thread(target=read_cf, daemon=True).start()\n",
|
|
"\n",
|
|
"\n",
|
|
"while True:\n",
|
|
" if server_ready and public_url:\n",
|
|
" print(f\"✅ Public URL: {public_url}\\n\");\n",
|
|
" public_url = None\n",
|
|
" time.sleep(0.25)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"accelerator": "GPU",
|
|
"colab": {
|
|
"gpuType": "T4",
|
|
"include_colab_link": true,
|
|
"machine_shape": "hm",
|
|
"name": "VibeVoice_Colab.ipynb",
|
|
"provenance": []
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.11"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|