1. unify env for TTS and ASR; 2. avoid transformers 5.0.0 temporarily

This commit is contained in:
YaoyaoChang
2026-01-26 03:26:52 -08:00
parent a00f431e14
commit a69e77c036
4 changed files with 13 additions and 25 deletions
+2 -1
View File
@@ -65,7 +65,8 @@ sudo docker run --privileged --net=host --ipc=host --ulimit memlock=-1:-1 --ulim
```bash ```bash
git clone https://github.com/microsoft/VibeVoice.git git clone https://github.com/microsoft/VibeVoice.git
cd VibeVoice cd VibeVoice
pip install -e .[asr]
pip install -e .
``` ```
## Usages ## Usages
+2 -1
View File
@@ -97,9 +97,10 @@ sudo docker run --privileged --net=host --ipc=host --ulimit memlock=-1:-1 --ulim
git clone https://github.com/microsoft/VibeVoice.git git clone https://github.com/microsoft/VibeVoice.git
cd VibeVoice/ cd VibeVoice/
pip install -e .[tts] pip install -e .
``` ```
## Usages ## Usages
+2 -2
View File
@@ -5,8 +5,8 @@ This directory contains scripts for LoRA (Low-Rank Adaptation) fine-tuning of th
## Requirements ## Requirements
```bash ```bash
# you need to install vibevoice first # Install vibevoice first
# pip install -e .[asr] pip install -e .
pip install peft pip install peft
``` ```
+2 -16
View File
@@ -18,6 +18,7 @@ classifiers = [
] ]
dependencies = [ dependencies = [
"torch", "torch",
"transformers>=4.51.3,<5.0.0",
"accelerate", "accelerate",
"llvmlite>=0.40.0", "llvmlite>=0.40.0",
"numba>=0.57.0", "numba>=0.57.0",
@@ -29,26 +30,11 @@ dependencies = [
"ml-collections", "ml-collections",
"absl-py", "absl-py",
"gradio", "gradio",
]
[project.optional-dependencies]
tts = [
"transformers==4.51.3", # we develop this project on transformers==4.51.3, later version may not be compatible
"av", "av",
"aiortc", "aiortc",
"uvicorn[standard]", "uvicorn[standard]",
"fastapi"
]
asr = [
"transformers>=4.51.3", # the versions after 4.51.3 are all support
"pydub" # for visualization
]
vllm = [
"transformers>=4.51.3",
"fastapi", "fastapi",
"uvicorn[standard]", "pydub",
"requests", "requests",
] ]