From c0c2af984eec4faef2b72a4f714538f0819c118d Mon Sep 17 00:00:00 2001 From: YaoyaoChang Date: Thu, 22 Jan 2026 06:20:11 -0800 Subject: [PATCH] update README for finetuning-asr --- README.md | 2 +- docs/vibevoice-asr.md | 4 ++++ {finetuning => finetuning-asr}/README.md | 0 {finetuning => finetuning-asr}/inference_lora.py | 0 {finetuning => finetuning-asr}/lora_finetune.py | 0 {finetuning => finetuning-asr}/toy_dataset/0.json | 0 {finetuning => finetuning-asr}/toy_dataset/0.mp3 | Bin {finetuning => finetuning-asr}/toy_dataset/1.json | 0 {finetuning => finetuning-asr}/toy_dataset/1.mp3 | Bin 9 files changed, 5 insertions(+), 1 deletion(-) rename {finetuning => finetuning-asr}/README.md (100%) rename {finetuning => finetuning-asr}/inference_lora.py (100%) rename {finetuning => finetuning-asr}/lora_finetune.py (100%) rename {finetuning => finetuning-asr}/toy_dataset/0.json (100%) rename {finetuning => finetuning-asr}/toy_dataset/0.mp3 (100%) rename {finetuning => finetuning-asr}/toy_dataset/1.json (100%) rename {finetuning => finetuning-asr}/toy_dataset/1.mp3 (100%) diff --git a/README.md b/README.md index be150f0..fe63f7c 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ For more information, demos, and examples, please visit our [Project Page](https - **📝 Rich Transcription (Who, When, What)**: The model jointly performs ASR, diarization, and timestamping, producing a structured output that indicates *who* said *what* and *when*. -[📖 Documentation](docs/vibevoice-asr.md) | [🤗 Hugging Face](https://huggingface.co/microsoft/VibeVoice-ASR) | [🎮 Playground](https://aka.ms/vibevoice-asr) +[📖 Documentation](docs/vibevoice-asr.md) | [🤗 Hugging Face](https://huggingface.co/microsoft/VibeVoice-ASR) | [🎮 Playground](https://aka.ms/vibevoice-asr) | [🛠️ Finetuning](finetuning-asr/README.md)

diff --git a/docs/vibevoice-asr.md b/docs/vibevoice-asr.md index 268ca8d..9d2bb4e 100644 --- a/docs/vibevoice-asr.md +++ b/docs/vibevoice-asr.md @@ -7,6 +7,7 @@ **Model:** [VibeVoice-ASR-7B](https://huggingface.co/microsoft/VibeVoice-ASR)
**Demo:** [VibeVoice-ASR-Demo](https://aka.ms/vibevoice-asr)
+**Finetuning:** [finetune-guide](../finetuning-asr/README.md) ## 🔥 Key Features @@ -78,6 +79,9 @@ python demo/vibevoice_asr_gradio_demo.py --model_path microsoft/VibeVoice-ASR -- python demo/vibevoice_asr_inference_from_file.py --model_path microsoft/VibeVoice-ASR --audio_files [add a audio path here] ``` +## Finetuning +LoRA (Low-Rank Adaptation) fine-tuning is supported. See [Finetuning](../finetuning-asr/README.md) for detailed guide. + ## 📄 License diff --git a/finetuning/README.md b/finetuning-asr/README.md similarity index 100% rename from finetuning/README.md rename to finetuning-asr/README.md diff --git a/finetuning/inference_lora.py b/finetuning-asr/inference_lora.py similarity index 100% rename from finetuning/inference_lora.py rename to finetuning-asr/inference_lora.py diff --git a/finetuning/lora_finetune.py b/finetuning-asr/lora_finetune.py similarity index 100% rename from finetuning/lora_finetune.py rename to finetuning-asr/lora_finetune.py diff --git a/finetuning/toy_dataset/0.json b/finetuning-asr/toy_dataset/0.json similarity index 100% rename from finetuning/toy_dataset/0.json rename to finetuning-asr/toy_dataset/0.json diff --git a/finetuning/toy_dataset/0.mp3 b/finetuning-asr/toy_dataset/0.mp3 similarity index 100% rename from finetuning/toy_dataset/0.mp3 rename to finetuning-asr/toy_dataset/0.mp3 diff --git a/finetuning/toy_dataset/1.json b/finetuning-asr/toy_dataset/1.json similarity index 100% rename from finetuning/toy_dataset/1.json rename to finetuning-asr/toy_dataset/1.json diff --git a/finetuning/toy_dataset/1.mp3 b/finetuning-asr/toy_dataset/1.mp3 similarity index 100% rename from finetuning/toy_dataset/1.mp3 rename to finetuning-asr/toy_dataset/1.mp3