Skip to content

AudioTensor

docarray.typing.tensor.audio.audio_ndarray

AudioNdArray

Bases: AbstractAudioTensor, NdArray

Subclass of NdArray, to represent an audio tensor. Adds audio-specific features to the tensor.


from typing import Optional

from docarray import BaseDoc
from docarray.typing import AudioBytes, AudioNdArray, AudioUrl
import numpy as np


class MyAudioDoc(BaseDoc):
    title: str
    audio_tensor: Optional[AudioNdArray]
    url: Optional[AudioUrl]
    bytes_: Optional[AudioBytes]


# from tensor
doc_1 = MyAudioDoc(
    title='my_first_audio_doc',
    audio_tensor=np.random.rand(1000, 2),
)

# doc_1.audio_tensor.save(file_path='/tmp/file_1.wav')
doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

# from url
doc_2 = MyAudioDoc(
    title='my_second_audio_doc',
    url='https://www.kozco.com/tech/piano2.wav',
)

doc_2.audio_tensor, _ = doc_2.url.load()
# doc_2.audio_tensor.save(file_path='/tmp/file_2.wav')
doc_2.bytes_ = doc_1.audio_tensor.to_bytes()

Source code in docarray/typing/tensor/audio/audio_ndarray.py
@_register_proto(proto_type_name='audio_ndarray')
class AudioNdArray(AbstractAudioTensor, NdArray):
    """
    Subclass of [`NdArray`][docarray.typing.NdArray], to represent an audio tensor.
    Adds audio-specific features to the tensor.


    ---

    ```python
    from typing import Optional

    from docarray import BaseDoc
    from docarray.typing import AudioBytes, AudioNdArray, AudioUrl
    import numpy as np


    class MyAudioDoc(BaseDoc):
        title: str
        audio_tensor: Optional[AudioNdArray]
        url: Optional[AudioUrl]
        bytes_: Optional[AudioBytes]


    # from tensor
    doc_1 = MyAudioDoc(
        title='my_first_audio_doc',
        audio_tensor=np.random.rand(1000, 2),
    )

    # doc_1.audio_tensor.save(file_path='/tmp/file_1.wav')
    doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

    # from url
    doc_2 = MyAudioDoc(
        title='my_second_audio_doc',
        url='https://www.kozco.com/tech/piano2.wav',
    )

    doc_2.audio_tensor, _ = doc_2.url.load()
    # doc_2.audio_tensor.save(file_path='/tmp/file_2.wav')
    doc_2.bytes_ = doc_1.audio_tensor.to_bytes()
    ```

    ---
    """

    ...

docarray.typing.tensor.audio.abstract_audio_tensor

AbstractAudioTensor

Bases: AbstractTensor, ABC

Source code in docarray/typing/tensor/audio/abstract_audio_tensor.py
class AbstractAudioTensor(AbstractTensor, ABC):
    def to_bytes(self) -> 'AudioBytes':
        """
        Convert audio tensor to [`AudioBytes`][docarray.typing.AudioBytes].
        """
        from docarray.typing.bytes.audio_bytes import AudioBytes

        tensor = self.get_comp_backend().to_numpy(self)
        tensor = (tensor * MAX_INT_16).astype('<h')
        return AudioBytes(tensor.tobytes())

    def save(
        self: 'T',
        file_path: Union[str, BinaryIO],
        format: str = 'wav',
        frame_rate: int = 44100,
        sample_width: int = 2,
        pydub_args: Dict[str, Any] = {},
    ) -> None:
        """
        Save audio tensor to an audio file. Mono/stereo is preserved.

        :param file_path: path to an audio file. If file is a string, open the file by
            that name, otherwise treat it as a file-like object.
        :param format: format for the audio file ('mp3', 'wav', 'raw', 'ogg' or other ffmpeg/avconv supported files)
        :param frame_rate: sampling frequency
        :param sample_width: sample width in bytes
        :param pydub_args: dictionary of additional arguments for pydub.AudioSegment.export function
        """
        pydub = import_library('pydub', raise_error=True)  # noqa: F841
        from pydub import AudioSegment

        comp_backend = self.get_comp_backend()
        channels = 2 if comp_backend.n_dim(array=self) > 1 else 1  # type: ignore

        segment = AudioSegment(
            self.to_bytes(),
            frame_rate=frame_rate,
            sample_width=sample_width,
            channels=channels,
        )
        segment.export(file_path, format=format, **pydub_args)

    def display(self, rate=44100):
        """
        Play audio data from tensor in notebook.
        """
        if is_notebook():
            from IPython.display import Audio, display

            audio_np = self.get_comp_backend().to_numpy(self)
            display(Audio(audio_np, rate=rate))
        else:
            warnings.warn('Display of audio is only possible in a notebook.')

display(rate=44100)

Play audio data from tensor in notebook.

Source code in docarray/typing/tensor/audio/abstract_audio_tensor.py
def display(self, rate=44100):
    """
    Play audio data from tensor in notebook.
    """
    if is_notebook():
        from IPython.display import Audio, display

        audio_np = self.get_comp_backend().to_numpy(self)
        display(Audio(audio_np, rate=rate))
    else:
        warnings.warn('Display of audio is only possible in a notebook.')

save(file_path, format='wav', frame_rate=44100, sample_width=2, pydub_args={})

Save audio tensor to an audio file. Mono/stereo is preserved.

Parameters:

Name Type Description Default
file_path Union[str, BinaryIO]

path to an audio file. If file is a string, open the file by that name, otherwise treat it as a file-like object.

required
format str

format for the audio file ('mp3', 'wav', 'raw', 'ogg' or other ffmpeg/avconv supported files)

'wav'
frame_rate int

sampling frequency

44100
sample_width int

sample width in bytes

2
pydub_args Dict[str, Any]

dictionary of additional arguments for pydub.AudioSegment.export function

{}
Source code in docarray/typing/tensor/audio/abstract_audio_tensor.py
def save(
    self: 'T',
    file_path: Union[str, BinaryIO],
    format: str = 'wav',
    frame_rate: int = 44100,
    sample_width: int = 2,
    pydub_args: Dict[str, Any] = {},
) -> None:
    """
    Save audio tensor to an audio file. Mono/stereo is preserved.

    :param file_path: path to an audio file. If file is a string, open the file by
        that name, otherwise treat it as a file-like object.
    :param format: format for the audio file ('mp3', 'wav', 'raw', 'ogg' or other ffmpeg/avconv supported files)
    :param frame_rate: sampling frequency
    :param sample_width: sample width in bytes
    :param pydub_args: dictionary of additional arguments for pydub.AudioSegment.export function
    """
    pydub = import_library('pydub', raise_error=True)  # noqa: F841
    from pydub import AudioSegment

    comp_backend = self.get_comp_backend()
    channels = 2 if comp_backend.n_dim(array=self) > 1 else 1  # type: ignore

    segment = AudioSegment(
        self.to_bytes(),
        frame_rate=frame_rate,
        sample_width=sample_width,
        channels=channels,
    )
    segment.export(file_path, format=format, **pydub_args)

to_bytes()

Convert audio tensor to AudioBytes.

Source code in docarray/typing/tensor/audio/abstract_audio_tensor.py
def to_bytes(self) -> 'AudioBytes':
    """
    Convert audio tensor to [`AudioBytes`][docarray.typing.AudioBytes].
    """
    from docarray.typing.bytes.audio_bytes import AudioBytes

    tensor = self.get_comp_backend().to_numpy(self)
    tensor = (tensor * MAX_INT_16).astype('<h')
    return AudioBytes(tensor.tobytes())

docarray.typing.tensor.audio.audio_tensorflow_tensor

AudioTensorFlowTensor

Bases: AbstractAudioTensor, TensorFlowTensor

Subclass of TensorFlowTensor, to represent an audio tensor. Adds audio-specific features to the tensor.


from typing import Optional

import tensorflow as tf

from docarray import BaseDoc
from docarray.typing import AudioBytes, AudioTensorFlowTensor, AudioUrl


class MyAudioDoc(BaseDoc):
    title: str
    audio_tensor: Optional[AudioTensorFlowTensor]
    url: Optional[AudioUrl]
    bytes_: Optional[AudioBytes]


doc_1 = MyAudioDoc(
    title='my_first_audio_doc',
    audio_tensor=tf.random.normal((1000, 2)),
)

# doc_1.audio_tensor.save(file_path='file_1.wav')
doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

doc_2 = MyAudioDoc(
    title='my_second_audio_doc',
    url='https://www.kozco.com/tech/piano2.wav',
)

doc_2.audio_tensor, _ = doc_2.url.load()
doc_2.bytes_ = doc_1.audio_tensor.to_bytes()

Source code in docarray/typing/tensor/audio/audio_tensorflow_tensor.py
@_register_proto(proto_type_name='audio_tensorflow_tensor')
class AudioTensorFlowTensor(
    AbstractAudioTensor, TensorFlowTensor, metaclass=metaTensorFlow
):
    """
    Subclass of [`TensorFlowTensor`][docarray.typing.TensorFlowTensor],
    to represent an audio tensor. Adds audio-specific features to the tensor.

    ---

    ```python
    from typing import Optional

    import tensorflow as tf

    from docarray import BaseDoc
    from docarray.typing import AudioBytes, AudioTensorFlowTensor, AudioUrl


    class MyAudioDoc(BaseDoc):
        title: str
        audio_tensor: Optional[AudioTensorFlowTensor]
        url: Optional[AudioUrl]
        bytes_: Optional[AudioBytes]


    doc_1 = MyAudioDoc(
        title='my_first_audio_doc',
        audio_tensor=tf.random.normal((1000, 2)),
    )

    # doc_1.audio_tensor.save(file_path='file_1.wav')
    doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

    doc_2 = MyAudioDoc(
        title='my_second_audio_doc',
        url='https://www.kozco.com/tech/piano2.wav',
    )

    doc_2.audio_tensor, _ = doc_2.url.load()
    doc_2.bytes_ = doc_1.audio_tensor.to_bytes()
    ```

    ---
    """

    ...

docarray.typing.tensor.audio.audio_torch_tensor

AudioTorchTensor

Bases: AbstractAudioTensor, TorchTensor

Subclass of TorchTensor, to represent an audio tensor. Adds audio-specific features to the tensor.


from typing import Optional

import torch

from docarray import BaseDoc
from docarray.typing import AudioBytes, AudioTorchTensor, AudioUrl


class MyAudioDoc(BaseDoc):
    title: str
    audio_tensor: Optional[AudioTorchTensor]
    url: Optional[AudioUrl]
    bytes_: Optional[AudioBytes]


doc_1 = MyAudioDoc(
    title='my_first_audio_doc',
    audio_tensor=torch.zeros(1000, 2),
)

# doc_1.audio_tensor.save(file_path='/tmp/file_1.wav')
doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

doc_2 = MyAudioDoc(
    title='my_second_audio_doc',
    url='https://www.kozco.com/tech/piano2.wav',
)

doc_2.audio_tensor, _ = doc_2.url.load()
# doc_2.audio_tensor.save(file_path='/tmp/file_2.wav')
doc_2.bytes_ = doc_1.audio_tensor.to_bytes()

Source code in docarray/typing/tensor/audio/audio_torch_tensor.py
@_register_proto(proto_type_name='audio_torch_tensor')
class AudioTorchTensor(AbstractAudioTensor, TorchTensor, metaclass=metaTorchAndNode):
    """
    Subclass of [`TorchTensor`][docarray.typing.TorchTensor], to represent an audio tensor.
    Adds audio-specific features to the tensor.

    ---

    ```python
    from typing import Optional

    import torch

    from docarray import BaseDoc
    from docarray.typing import AudioBytes, AudioTorchTensor, AudioUrl


    class MyAudioDoc(BaseDoc):
        title: str
        audio_tensor: Optional[AudioTorchTensor]
        url: Optional[AudioUrl]
        bytes_: Optional[AudioBytes]


    doc_1 = MyAudioDoc(
        title='my_first_audio_doc',
        audio_tensor=torch.zeros(1000, 2),
    )

    # doc_1.audio_tensor.save(file_path='/tmp/file_1.wav')
    doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

    doc_2 = MyAudioDoc(
        title='my_second_audio_doc',
        url='https://www.kozco.com/tech/piano2.wav',
    )

    doc_2.audio_tensor, _ = doc_2.url.load()
    # doc_2.audio_tensor.save(file_path='/tmp/file_2.wav')
    doc_2.bytes_ = doc_1.audio_tensor.to_bytes()
    ```

    ---
    """

    ...