This commit is contained in:
YaoyaoChang
2025-12-04 22:33:57 -08:00
parent adc08b1575
commit 7ea24a4fb9
3 changed files with 6 additions and 6 deletions
+1 -1
View File
@@ -23,7 +23,7 @@
<img src="https://img.shields.io/badge/Status-New-brightgreen?style=flat" alt="New" />
<img src="https://img.shields.io/badge/Feature-Realtime_TTS-blue?style=flat&logo=soundcharts" alt="Realtime TTS" />
<strong>2025-12-03: 📣 We open-sourced <a href="docs/vibevoice-realtime-0.5b.md"><strong>VibeVoiceRealtime0.5B</strong></a>, a realtime texttospeech model that supports streaming text input and robust long-form speech generation.</strong>
<strong>2025-12-03: 📣 We open-sourced <a href="docs/vibevoice-realtime-0.5b.md"><strong>VibeVoiceRealtime0.5B</strong></a>, a realtime texttospeech model that supports streaming text input and robust long-form speech generation. Try it on [Colab](https://colab.research.google.com/github/microsoft/VibeVoice/blob/main/demo/vibevoice_realtime_colab.ipynb).</strong>
<br>
https://github.com/user-attachments/assets/0901d274-f6ae-46ef-a0fd-3c4fba4f76dc
+3 -3
View File
@@ -351,12 +351,12 @@
<div class="control-row">
<label class="range-control">
<span>CFG</span>
<input id="cfgScale" type="range" min="1" max="3" step="0.05" value="1.5" />
<input id="cfgScale" type="range" min="1.3" max="3" step="0.05" value="1.5" />
<span class="range-value" id="cfgValue">1.5</span>
</label>
<label class="range-control">
<span>Inference Steps</span>
<input id="inferenceSteps" type="range" min="1" max="20" step="1" value="5" />
<input id="inferenceSteps" type="range" min="5" max="20" step="1" value="5" />
<span class="range-value" id="stepsValue">5</span>
</label>
<button id="resetControls" type="button" class="secondary-btn">Reset Controls</button>
@@ -482,7 +482,7 @@
};
const updateCfgDisplay = () => {
cfgValueLabel.textContent = Number(cfgSelect.value).toFixed(3);
cfgValueLabel.textContent = Number(cfgSelect.value).toFixed(2);
};
const updateStepsDisplay = () => {
+2 -2
View File
@@ -111,11 +111,11 @@ Due to network latency, the time when audio playback is heard may exceed the ~30
python demo/vibevoice_realtime_demo.py --model_path microsoft/VibeVoice-Realtime-0.5B
```
Tip: You can also deploy and run the real-time demo on [Colab](https://colab.research.google.com/github/microsoft/VibeVoice/blob/main/demo/vibevoice_realtime_colab.ipynb).
Tip: Just try it on [Colab](https://colab.research.google.com/github/microsoft/VibeVoice/blob/main/demo/vibevoice_realtime_colab.ipynb).
### Usage 2: Inference from files directly
```bash
# We provide some LLM generated example scripts under demo/text_examples/ for demo
# We provide some example scripts under demo/text_examples/ for demo
python demo/realtime_model_inference_from_file.py --model_path microsoft/VibeVoice-Realtime-0.5B --txt_path demo/text_examples/1p_vibevoice.txt --speaker_name Carter
```