1. unify env for TTS and ASR; 2. avoid transformers 5.0.0 temporarily

2026-01-26 03:26:52 -08:00
parent a00f431e14
commit a69e77c036
4 changed files with 13 additions and 25 deletions
@@ -65,7 +65,8 @@ sudo docker run --privileged --net=host --ipc=host --ulimit memlock=-1:-1 --ulim
 ```bash
 git clone https://github.com/microsoft/VibeVoice.git
 cd VibeVoice
-pip install -e .[asr]
+
+pip install -e .
 ```

 ## Usages
@@ -97,9 +97,10 @@ sudo docker run --privileged --net=host --ipc=host --ulimit memlock=-1:-1 --ulim
 git clone https://github.com/microsoft/VibeVoice.git
 cd VibeVoice/

-pip install -e .[tts]
+pip install -e .
 ```

+
 ## Usages


@@ -5,8 +5,8 @@ This directory contains scripts for LoRA (Low-Rank Adaptation) fine-tuning of th
 ## Requirements

 ```bash
-# you need to install vibevoice first
-# pip install -e .[asr]
+# Install vibevoice first
+pip install -e .

 pip install peft
 ```
@@ -18,6 +18,7 @@ classifiers = [
 ]
 dependencies = [
    "torch",
+    "transformers>=4.51.3,<5.0.0",
    "accelerate",
    "llvmlite>=0.40.0",
    "numba>=0.57.0",
@@ -29,26 +30,11 @@ dependencies = [
    "ml-collections",
    "absl-py",
    "gradio",
-]
-
-[project.optional-dependencies]
-tts = [
-  "transformers==4.51.3", # we develop this project on transformers==4.51.3, later version may not be compatible
    "av",
    "aiortc",
    "uvicorn[standard]",
-  "fastapi"
-]
-
-asr = [
-  "transformers>=4.51.3", # the versions after 4.51.3 are all support
-  "pydub" # for visualization
-]
-
-vllm = [
-  "transformers>=4.51.3",
    "fastapi",
-  "uvicorn[standard]",
+    "pydub",
    "requests",
 ]