1. unify env for TTS and ASR; 2. avoid transformers 5.0.0 temporarily

This commit is contained in:
YaoyaoChang
2026-01-26 03:26:52 -08:00
parent a00f431e14
commit a69e77c036
4 changed files with 13 additions and 25 deletions
+2 -1
View File
@@ -65,7 +65,8 @@ sudo docker run --privileged --net=host --ipc=host --ulimit memlock=-1:-1 --ulim
```bash
git clone https://github.com/microsoft/VibeVoice.git
cd VibeVoice
pip install -e .[asr]
pip install -e .
```
## Usages
+2 -1
View File
@@ -97,9 +97,10 @@ sudo docker run --privileged --net=host --ipc=host --ulimit memlock=-1:-1 --ulim
git clone https://github.com/microsoft/VibeVoice.git
cd VibeVoice/
pip install -e .[tts]
pip install -e .
```
## Usages
+2 -2
View File
@@ -5,8 +5,8 @@ This directory contains scripts for LoRA (Low-Rank Adaptation) fine-tuning of th
## Requirements
```bash
# you need to install vibevoice first
# pip install -e .[asr]
# Install vibevoice first
pip install -e .
pip install peft
```
+2 -16
View File
@@ -18,6 +18,7 @@ classifiers = [
]
dependencies = [
"torch",
"transformers>=4.51.3,<5.0.0",
"accelerate",
"llvmlite>=0.40.0",
"numba>=0.57.0",
@@ -29,26 +30,11 @@ dependencies = [
"ml-collections",
"absl-py",
"gradio",
]
[project.optional-dependencies]
tts = [
"transformers==4.51.3", # we develop this project on transformers==4.51.3, later version may not be compatible
"av",
"aiortc",
"uvicorn[standard]",
"fastapi"
]
asr = [
"transformers>=4.51.3", # the versions after 4.51.3 are all support
"pydub" # for visualization
]
vllm = [
"transformers>=4.51.3",
"fastapi",
"uvicorn[standard]",
"pydub",
"requests",
]