Skip to content

AudioTensor

docarray.typing.tensor.audio.audio_ndarray

AudioNdArray

Bases: AbstractAudioTensor, NdArray

Subclass of NdArray, to represent an audio tensor. Adds audio-specific features to the tensor.


from typing import Optional

from docarray import BaseDoc
from docarray.typing import AudioBytes, AudioNdArray, AudioUrl
import numpy as np


class MyAudioDoc(BaseDoc):
    title: str
    audio_tensor: Optional[AudioNdArray] = None
    url: Optional[AudioUrl] = None
    bytes_: Optional[AudioBytes] = None


# from tensor
doc_1 = MyAudioDoc(
    title='my_first_audio_doc',
    audio_tensor=np.random.rand(1000, 2),
)

# doc_1.audio_tensor.save(file_path='/tmp/file_1.wav')
doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

# from url
doc_2 = MyAudioDoc(
    title='my_second_audio_doc',
    url='https://www.kozco.com/tech/piano2.wav',
)

doc_2.audio_tensor, _ = doc_2.url.load()
# doc_2.audio_tensor.save(file_path='/tmp/file_2.wav')
doc_2.bytes_ = doc_1.audio_tensor.to_bytes()

Source code in docarray/typing/tensor/audio/audio_ndarray.py
@_register_proto(proto_type_name='audio_ndarray')
class AudioNdArray(AbstractAudioTensor, NdArray):
    """
    Subclass of [`NdArray`][docarray.typing.NdArray], to represent an audio tensor.
    Adds audio-specific features to the tensor.


    ---

    ```python
    from typing import Optional

    from docarray import BaseDoc
    from docarray.typing import AudioBytes, AudioNdArray, AudioUrl
    import numpy as np


    class MyAudioDoc(BaseDoc):
        title: str
        audio_tensor: Optional[AudioNdArray] = None
        url: Optional[AudioUrl] = None
        bytes_: Optional[AudioBytes] = None


    # from tensor
    doc_1 = MyAudioDoc(
        title='my_first_audio_doc',
        audio_tensor=np.random.rand(1000, 2),
    )

    # doc_1.audio_tensor.save(file_path='/tmp/file_1.wav')
    doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

    # from url
    doc_2 = MyAudioDoc(
        title='my_second_audio_doc',
        url='https://www.kozco.com/tech/piano2.wav',
    )

    doc_2.audio_tensor, _ = doc_2.url.load()
    # doc_2.audio_tensor.save(file_path='/tmp/file_2.wav')
    doc_2.bytes_ = doc_1.audio_tensor.to_bytes()
    ```

    ---
    """

    ...

docarray.typing.tensor.audio.abstract_audio_tensor

AbstractAudioTensor

Bases: AbstractTensor, ABC

Source code in docarray/typing/tensor/audio/abstract_audio_tensor.py
class AbstractAudioTensor(AbstractTensor, ABC):
    def to_bytes(self) -> 'AudioBytes':
        """
        Convert audio tensor to [`AudioBytes`][docarray.typing.AudioBytes].
        """
        from docarray.typing.bytes.audio_bytes import AudioBytes

        tensor = self.get_comp_backend().to_numpy(self)
        tensor = (tensor * MAX_INT_16).astype('<h')
        return AudioBytes(tensor.tobytes())

    def save(
        self: 'T',
        file_path: Union[str, BinaryIO],
        format: str = 'wav',
        frame_rate: int = 44100,
        sample_width: int = 2,
        pydub_args: Dict[str, Any] = {},
    ) -> None:
        """
        Save audio tensor to an audio file. Mono/stereo is preserved.

        :param file_path: path to an audio file. If file is a string, open the file by
            that name, otherwise treat it as a file-like object.
        :param format: format for the audio file ('mp3', 'wav', 'raw', 'ogg' or other ffmpeg/avconv supported files)
        :param frame_rate: sampling frequency
        :param sample_width: sample width in bytes
        :param pydub_args: dictionary of additional arguments for pydub.AudioSegment.export function
        """
        pydub = import_library('pydub', raise_error=True)  # noqa: F841
        from pydub import AudioSegment

        comp_backend = self.get_comp_backend()
        channels = 2 if comp_backend.n_dim(array=self) > 1 else 1  # type: ignore

        segment = AudioSegment(
            self.to_bytes(),
            frame_rate=frame_rate,
            sample_width=sample_width,
            channels=channels,
        )
        segment.export(file_path, format=format, **pydub_args)

    def display(self, rate=44100):
        """
        Play audio data from tensor in notebook.
        """
        if is_notebook():
            from IPython.display import Audio, display

            audio_np = self.get_comp_backend().to_numpy(self)
            display(Audio(audio_np, rate=rate))
        else:
            warnings.warn('Display of audio is only possible in a notebook.')

__docarray_validate_getitem__(item) classmethod

This method validates the input to AbstractTensor.__class_getitem__.

It is called at "class creation time", i.e. when a class is created with syntax of the form AnyTensor[shape].

The default implementation tries to cast any item to a tuple of ints. A subclass can override this method to implement custom validation logic.

The output of this is eventually passed to AbstractTensor.__docarray_validate_shape__ as its shape argument.

Raises ValueError if the input item does not pass validation.

Parameters:

Name Type Description Default
item Any

The item to validate, passed to __class_getitem__ (Tensor[item]).

required

Returns:

Type Description
Tuple[int]

The validated item == the target shape of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_getitem__(cls, item: Any) -> Tuple[int]:
    """This method validates the input to `AbstractTensor.__class_getitem__`.

    It is called at "class creation time",
    i.e. when a class is created with syntax of the form AnyTensor[shape].

    The default implementation tries to cast any `item` to a tuple of ints.
    A subclass can override this method to implement custom validation logic.

    The output of this is eventually passed to
    [`AbstractTensor.__docarray_validate_shape__`]
    [docarray.typing.tensor.abstract_tensor.AbstractTensor.__docarray_validate_shape__]
    as its `shape` argument.

    Raises `ValueError` if the input `item` does not pass validation.

    :param item: The item to validate, passed to `__class_getitem__` (`Tensor[item]`).
    :return: The validated item == the target shape of this tensor.
    """
    if isinstance(item, int):
        item = (item,)
    try:
        item = tuple(item)
    except TypeError:
        raise TypeError(f'{item} is not a valid tensor shape.')
    return item

__docarray_validate_shape__(t, shape) classmethod

Every tensor has to implement this method in order to enable syntax of the form AnyTensor[shape]. It is called when a tensor is assigned to a field of this type. i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

The intended behaviour is as follows:

  • If the shape of t is equal to shape, return t.
  • If the shape of t is not equal to shape, but can be reshaped to shape, return t reshaped to shape.
  • If the shape of t is not equal to shape and cannot be reshaped to shape, raise a ValueError.

Parameters:

Name Type Description Default
t T

The tensor to validate.

required
shape Tuple[Union[int, str], ...]

The shape to validate against.

required

Returns:

Type Description
T

The validated tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@classmethod
def __docarray_validate_shape__(cls, t: T, shape: Tuple[Union[int, str], ...]) -> T:
    """Every tensor has to implement this method in order to
    enable syntax of the form AnyTensor[shape].
    It is called when a tensor is assigned to a field of this type.
    i.e. when a tensor is passed to a Document field of type AnyTensor[shape].

    The intended behaviour is as follows:

    - If the shape of `t` is equal to `shape`, return `t`.
    - If the shape of `t` is not equal to `shape`,
        but can be reshaped to `shape`, return `t` reshaped to `shape`.
    - If the shape of `t` is not equal to `shape`
        and cannot be reshaped to `shape`, raise a ValueError.

    :param t: The tensor to validate.
    :param shape: The shape to validate against.
    :return: The validated tensor.
    """
    comp_be = t.get_comp_backend()
    tshape = comp_be.shape(t)
    if tshape == shape:
        return t
    elif any(isinstance(dim, str) or dim == Ellipsis for dim in shape):
        ellipsis_occurrences = [
            pos for pos, dim in enumerate(shape) if dim == Ellipsis
        ]
        if ellipsis_occurrences:
            if len(ellipsis_occurrences) > 1:
                raise ValueError(
                    f'Cannot use Ellipsis (...) more than once for the shape {shape}'
                )
            ellipsis_pos = ellipsis_occurrences[0]
            # Calculate how many dimensions to add. Should be at least 1.
            dimensions_needed = max(len(tshape) - len(shape) + 1, 1)
            shape = (
                shape[:ellipsis_pos]
                + tuple(
                    f'__dim_var_{index}__' for index in range(dimensions_needed)
                )
                + shape[ellipsis_pos + 1 :]
            )

        if len(tshape) != len(shape):
            raise ValueError(
                f'Tensor shape mismatch. Expected {shape}, got {tshape}'
            )
        known_dims: Dict[str, int] = {}
        for tdim, dim in zip(tshape, shape):
            if isinstance(dim, int) and tdim != dim:
                raise ValueError(
                    f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                )
            elif isinstance(dim, str):
                if dim in known_dims and known_dims[dim] != tdim:
                    raise ValueError(
                        f'Tensor shape mismatch. Expected {shape}, got {tshape}'
                    )
                else:
                    known_dims[dim] = tdim
        else:
            return t
    else:
        shape = cast(Tuple[int], shape)
        warnings.warn(
            f'Tensor shape mismatch. Reshaping tensor '
            f'of shape {tshape} to shape {shape}'
        )
        try:
            value = cls._docarray_from_native(comp_be.reshape(t, shape))
            return cast(T, value)
        except RuntimeError:
            raise ValueError(
                f'Cannot reshape tensor of shape {tshape} to shape {shape}'
            )

__getitem__(item) abstractmethod

Get a slice of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __getitem__(self: T, item) -> T:
    """Get a slice of this tensor."""
    ...

__iter__() abstractmethod

Iterate over the elements of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __iter__(self):
    """Iterate over the elements of this tensor."""
    ...

__setitem__(index, value) abstractmethod

Set a slice of this tensor.

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def __setitem__(self, index, value):
    """Set a slice of this tensor."""
    ...

display(rate=44100)

Play audio data from tensor in notebook.

Source code in docarray/typing/tensor/audio/abstract_audio_tensor.py
def display(self, rate=44100):
    """
    Play audio data from tensor in notebook.
    """
    if is_notebook():
        from IPython.display import Audio, display

        audio_np = self.get_comp_backend().to_numpy(self)
        display(Audio(audio_np, rate=rate))
    else:
        warnings.warn('Display of audio is only possible in a notebook.')

get_comp_backend() abstractmethod staticmethod

The computational backend compatible with this tensor type.

Source code in docarray/typing/tensor/abstract_tensor.py
@staticmethod
@abc.abstractmethod
def get_comp_backend() -> AbstractComputationalBackend:
    """The computational backend compatible with this tensor type."""
    ...

save(file_path, format='wav', frame_rate=44100, sample_width=2, pydub_args={})

Save audio tensor to an audio file. Mono/stereo is preserved.

Parameters:

Name Type Description Default
file_path Union[str, BinaryIO]

path to an audio file. If file is a string, open the file by that name, otherwise treat it as a file-like object.

required
format str

format for the audio file ('mp3', 'wav', 'raw', 'ogg' or other ffmpeg/avconv supported files)

'wav'
frame_rate int

sampling frequency

44100
sample_width int

sample width in bytes

2
pydub_args Dict[str, Any]

dictionary of additional arguments for pydub.AudioSegment.export function

{}
Source code in docarray/typing/tensor/audio/abstract_audio_tensor.py
def save(
    self: 'T',
    file_path: Union[str, BinaryIO],
    format: str = 'wav',
    frame_rate: int = 44100,
    sample_width: int = 2,
    pydub_args: Dict[str, Any] = {},
) -> None:
    """
    Save audio tensor to an audio file. Mono/stereo is preserved.

    :param file_path: path to an audio file. If file is a string, open the file by
        that name, otherwise treat it as a file-like object.
    :param format: format for the audio file ('mp3', 'wav', 'raw', 'ogg' or other ffmpeg/avconv supported files)
    :param frame_rate: sampling frequency
    :param sample_width: sample width in bytes
    :param pydub_args: dictionary of additional arguments for pydub.AudioSegment.export function
    """
    pydub = import_library('pydub', raise_error=True)  # noqa: F841
    from pydub import AudioSegment

    comp_backend = self.get_comp_backend()
    channels = 2 if comp_backend.n_dim(array=self) > 1 else 1  # type: ignore

    segment = AudioSegment(
        self.to_bytes(),
        frame_rate=frame_rate,
        sample_width=sample_width,
        channels=channels,
    )
    segment.export(file_path, format=format, **pydub_args)

to_bytes()

Convert audio tensor to AudioBytes.

Source code in docarray/typing/tensor/audio/abstract_audio_tensor.py
def to_bytes(self) -> 'AudioBytes':
    """
    Convert audio tensor to [`AudioBytes`][docarray.typing.AudioBytes].
    """
    from docarray.typing.bytes.audio_bytes import AudioBytes

    tensor = self.get_comp_backend().to_numpy(self)
    tensor = (tensor * MAX_INT_16).astype('<h')
    return AudioBytes(tensor.tobytes())

to_protobuf() abstractmethod

Convert DocList into a Protobuf message

Source code in docarray/typing/tensor/abstract_tensor.py
@abc.abstractmethod
def to_protobuf(self) -> 'NdArrayProto':
    """Convert DocList into a Protobuf message"""
    ...

unwrap()

Return the native tensor object that this DocList tensor wraps.

Source code in docarray/typing/tensor/abstract_tensor.py
def unwrap(self):
    """Return the native tensor object that this DocList tensor wraps."""

docarray.typing.tensor.audio.audio_tensorflow_tensor

AudioTensorFlowTensor

Bases: AbstractAudioTensor, TensorFlowTensor

Subclass of TensorFlowTensor, to represent an audio tensor. Adds audio-specific features to the tensor.


from typing import Optional

import tensorflow as tf

from docarray import BaseDoc
from docarray.typing import AudioBytes, AudioTensorFlowTensor, AudioUrl


class MyAudioDoc(BaseDoc):
    title: str
    audio_tensor: Optional[AudioTensorFlowTensor]
    url: Optional[AudioUrl]
    bytes_: Optional[AudioBytes]


doc_1 = MyAudioDoc(
    title='my_first_audio_doc',
    audio_tensor=tf.random.normal((1000, 2)),
)

# doc_1.audio_tensor.save(file_path='file_1.wav')
doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

doc_2 = MyAudioDoc(
    title='my_second_audio_doc',
    url='https://www.kozco.com/tech/piano2.wav',
)

doc_2.audio_tensor, _ = doc_2.url.load()
doc_2.bytes_ = doc_1.audio_tensor.to_bytes()

Source code in docarray/typing/tensor/audio/audio_tensorflow_tensor.py
@_register_proto(proto_type_name='audio_tensorflow_tensor')
class AudioTensorFlowTensor(
    AbstractAudioTensor, TensorFlowTensor, metaclass=metaTensorFlow
):
    """
    Subclass of [`TensorFlowTensor`][docarray.typing.TensorFlowTensor],
    to represent an audio tensor. Adds audio-specific features to the tensor.

    ---

    ```python
    from typing import Optional

    import tensorflow as tf

    from docarray import BaseDoc
    from docarray.typing import AudioBytes, AudioTensorFlowTensor, AudioUrl


    class MyAudioDoc(BaseDoc):
        title: str
        audio_tensor: Optional[AudioTensorFlowTensor]
        url: Optional[AudioUrl]
        bytes_: Optional[AudioBytes]


    doc_1 = MyAudioDoc(
        title='my_first_audio_doc',
        audio_tensor=tf.random.normal((1000, 2)),
    )

    # doc_1.audio_tensor.save(file_path='file_1.wav')
    doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

    doc_2 = MyAudioDoc(
        title='my_second_audio_doc',
        url='https://www.kozco.com/tech/piano2.wav',
    )

    doc_2.audio_tensor, _ = doc_2.url.load()
    doc_2.bytes_ = doc_1.audio_tensor.to_bytes()
    ```

    ---
    """

    ...

docarray.typing.tensor.audio.audio_torch_tensor

AudioTorchTensor

Bases: AbstractAudioTensor, TorchTensor

Subclass of TorchTensor, to represent an audio tensor. Adds audio-specific features to the tensor.


from typing import Optional

import torch

from docarray import BaseDoc
from docarray.typing import AudioBytes, AudioTorchTensor, AudioUrl


class MyAudioDoc(BaseDoc):
    title: str
    audio_tensor: Optional[AudioTorchTensor] = None
    url: Optional[AudioUrl] = None
    bytes_: Optional[AudioBytes] = None


doc_1 = MyAudioDoc(
    title='my_first_audio_doc',
    audio_tensor=torch.zeros(1000, 2),
)

# doc_1.audio_tensor.save(file_path='/tmp/file_1.wav')
doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

doc_2 = MyAudioDoc(
    title='my_second_audio_doc',
    url='https://www.kozco.com/tech/piano2.wav',
)

doc_2.audio_tensor, _ = doc_2.url.load()
# doc_2.audio_tensor.save(file_path='/tmp/file_2.wav')
doc_2.bytes_ = doc_1.audio_tensor.to_bytes()

Source code in docarray/typing/tensor/audio/audio_torch_tensor.py
@_register_proto(proto_type_name='audio_torch_tensor')
class AudioTorchTensor(AbstractAudioTensor, TorchTensor, metaclass=metaTorchAndNode):
    """
    Subclass of [`TorchTensor`][docarray.typing.TorchTensor], to represent an audio tensor.
    Adds audio-specific features to the tensor.

    ---

    ```python
    from typing import Optional

    import torch

    from docarray import BaseDoc
    from docarray.typing import AudioBytes, AudioTorchTensor, AudioUrl


    class MyAudioDoc(BaseDoc):
        title: str
        audio_tensor: Optional[AudioTorchTensor] = None
        url: Optional[AudioUrl] = None
        bytes_: Optional[AudioBytes] = None


    doc_1 = MyAudioDoc(
        title='my_first_audio_doc',
        audio_tensor=torch.zeros(1000, 2),
    )

    # doc_1.audio_tensor.save(file_path='/tmp/file_1.wav')
    doc_1.bytes_ = doc_1.audio_tensor.to_bytes()

    doc_2 = MyAudioDoc(
        title='my_second_audio_doc',
        url='https://www.kozco.com/tech/piano2.wav',
    )

    doc_2.audio_tensor, _ = doc_2.url.load()
    # doc_2.audio_tensor.save(file_path='/tmp/file_2.wav')
    doc_2.bytes_ = doc_1.audio_tensor.to_bytes()
    ```

    ---
    """

    ...