Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 61ecb098d6 | |||
| b4cd7c479f | |||
| 11dd7420ec |
@@ -73,11 +73,21 @@ def _ffmpeg_load_file(filepath) -> tuple[np.ndarray, int]:
|
|||||||
|
|
||||||
# Register FFmpeg-based audio loader
|
# Register FFmpeg-based audio loader
|
||||||
import vllm.multimodal.audio as _vllm_audio_module
|
import vllm.multimodal.audio as _vllm_audio_module
|
||||||
|
import warnings
|
||||||
|
|
||||||
|
# Handle both old and new vLLM versions
|
||||||
|
# In newer versions, AudioMediaIO may not exist or may have been moved
|
||||||
|
try:
|
||||||
_OriginalAudioMediaIO = _vllm_audio_module.AudioMediaIO
|
_OriginalAudioMediaIO = _vllm_audio_module.AudioMediaIO
|
||||||
|
|
||||||
class _PatchedAudioMediaIO(_OriginalAudioMediaIO):
|
class _PatchedAudioMediaIO(_OriginalAudioMediaIO):
|
||||||
"""AudioMediaIO implementation using FFmpeg for audio decoding."""
|
"""AudioMediaIO implementation using FFmpeg for audio decoding."""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
# Call parent constructor if it exists
|
||||||
|
if hasattr(super(), '__init__'):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
|
||||||
def load_bytes(self, data: bytes) -> tuple[np.ndarray, int]:
|
def load_bytes(self, data: bytes) -> tuple[np.ndarray, int]:
|
||||||
return _ffmpeg_load_bytes(data, media_type=None)
|
return _ffmpeg_load_bytes(data, media_type=None)
|
||||||
|
|
||||||
@@ -91,8 +101,48 @@ class _PatchedAudioMediaIO(_OriginalAudioMediaIO):
|
|||||||
_vllm_audio_module.AudioMediaIO = _PatchedAudioMediaIO
|
_vllm_audio_module.AudioMediaIO = _PatchedAudioMediaIO
|
||||||
|
|
||||||
# Also patch in utils module where it's imported
|
# Also patch in utils module where it's imported
|
||||||
|
try:
|
||||||
import vllm.multimodal.utils as _vllm_utils_module
|
import vllm.multimodal.utils as _vllm_utils_module
|
||||||
_vllm_utils_module.AudioMediaIO = _PatchedAudioMediaIO
|
_vllm_utils_module.AudioMediaIO = _PatchedAudioMediaIO
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
warnings.warn(f"Could not patch AudioMediaIO in vllm.multimodal.utils: {e}", UserWarning)
|
||||||
|
|
||||||
|
except (AttributeError, ImportError) as e:
|
||||||
|
# AudioMediaIO doesn't exist in this vLLM version
|
||||||
|
# Define our own standalone implementation
|
||||||
|
warnings.warn(
|
||||||
|
f"AudioMediaIO not found in vllm.multimodal.audio ({e}). "
|
||||||
|
"Using standalone FFmpeg-based implementation for compatibility.",
|
||||||
|
UserWarning
|
||||||
|
)
|
||||||
|
|
||||||
|
class _PatchedAudioMediaIO:
|
||||||
|
"""Standalone AudioMediaIO implementation using FFmpeg for audio decoding.
|
||||||
|
|
||||||
|
This is used when vLLM doesn't provide AudioMediaIO or it's been moved/removed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def load_bytes(self, data: bytes) -> tuple[np.ndarray, int]:
|
||||||
|
return _ffmpeg_load_bytes(data, media_type=None)
|
||||||
|
|
||||||
|
def load_base64(self, media_type: str, data: str) -> tuple[np.ndarray, int]:
|
||||||
|
return _ffmpeg_load_bytes(base64.b64decode(data), media_type=media_type)
|
||||||
|
|
||||||
|
def load_file(self, filepath) -> tuple[np.ndarray, int]:
|
||||||
|
return _ffmpeg_load_file(filepath)
|
||||||
|
|
||||||
|
# Try to register it in the module if possible
|
||||||
|
try:
|
||||||
|
_vllm_audio_module.AudioMediaIO = _PatchedAudioMediaIO
|
||||||
|
except (AttributeError, TypeError) as e:
|
||||||
|
warnings.warn(
|
||||||
|
f"Could not register AudioMediaIO in vllm.multimodal.audio: {e}. "
|
||||||
|
"Audio loading will use FFmpeg functions directly.",
|
||||||
|
UserWarning
|
||||||
|
)
|
||||||
|
|
||||||
# ============================================================================
|
# ============================================================================
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user