Skip to content




Bases: BaseBytes

Bytes that store an audio and that can be load into an Audio tensor

Source code in docarray/typing/bytes/
class AudioBytes(BaseBytes):
    Bytes that store an audio and that can be load into an Audio tensor

    def load(self) -> Tuple[AudioNdArray, int]:
        Load the Audio from the [`AudioBytes`][docarray.typing.AudioBytes] into an


        from typing import Optional
        from docarray import BaseDoc
        from docarray.typing import AudioBytes, AudioNdArray, AudioUrl

        class MyAudio(BaseDoc):
            url: AudioUrl
            tensor: Optional[AudioNdArray] = None
            bytes_: Optional[AudioBytes] = None
            frame_rate: Optional[float] = None

        doc = MyAudio(url='')
        doc.bytes_ = doc.url.load_bytes()
        doc.tensor, doc.frame_rate = doc.bytes_.load()

        # Note this is equivalent to do

        doc.tensor, doc.frame_rate = doc.url.load()

        assert isinstance(doc.tensor, AudioNdArray)

        :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing the
            audio bytes content, and an integer representing the frame rate.
        pydub = import_library('pydub', raise_error=True)  # noqa: F841
        from pydub import AudioSegment

        segment = AudioSegment.from_file(io.BytesIO(self))

        # Convert to float32 using NumPy
        samples = np.array(segment.get_array_of_samples())

        # Normalise float32 array so that values are between -1.0 and +1.0
        samples_norm = samples / 2 ** (segment.sample_width * 8 - 1)
        return parse_obj_as(AudioNdArray, samples_norm), segment.frame_rate


Load the Audio from the AudioBytes into an AudioNdArray.

from typing import Optional
from docarray import BaseDoc
from docarray.typing import AudioBytes, AudioNdArray, AudioUrl

class MyAudio(BaseDoc):
    url: AudioUrl
    tensor: Optional[AudioNdArray] = None
    bytes_: Optional[AudioBytes] = None
    frame_rate: Optional[float] = None

doc = MyAudio(url='')
doc.bytes_ = doc.url.load_bytes()
doc.tensor, doc.frame_rate = doc.bytes_.load()

# Note this is equivalent to do

doc.tensor, doc.frame_rate = doc.url.load()

assert isinstance(doc.tensor, AudioNdArray)


Type Description
Tuple[AudioNdArray, int]

tuple of an AudioNdArray representing the audio bytes content, and an integer representing the frame rate.

Source code in docarray/typing/bytes/
def load(self) -> Tuple[AudioNdArray, int]:
    Load the Audio from the [`AudioBytes`][docarray.typing.AudioBytes] into an


    from typing import Optional
    from docarray import BaseDoc
    from docarray.typing import AudioBytes, AudioNdArray, AudioUrl

    class MyAudio(BaseDoc):
        url: AudioUrl
        tensor: Optional[AudioNdArray] = None
        bytes_: Optional[AudioBytes] = None
        frame_rate: Optional[float] = None

    doc = MyAudio(url='')
    doc.bytes_ = doc.url.load_bytes()
    doc.tensor, doc.frame_rate = doc.bytes_.load()

    # Note this is equivalent to do

    doc.tensor, doc.frame_rate = doc.url.load()

    assert isinstance(doc.tensor, AudioNdArray)

    :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing the
        audio bytes content, and an integer representing the frame rate.
    pydub = import_library('pydub', raise_error=True)  # noqa: F841
    from pydub import AudioSegment

    segment = AudioSegment.from_file(io.BytesIO(self))

    # Convert to float32 using NumPy
    samples = np.array(segment.get_array_of_samples())

    # Normalise float32 array so that values are between -1.0 and +1.0
    samples_norm = samples / 2 ** (segment.sample_width * 8 - 1)
    return parse_obj_as(AudioNdArray, samples_norm), segment.frame_rate


Bases: BaseBytes

Bytes that store an image and that can be load into an image tensor

Source code in docarray/typing/bytes/
class ImageBytes(BaseBytes):
    Bytes that store an image and that can be load into an image tensor

    def load_pil(
    ) -> 'PILImage.Image':
        Load the image from the bytes into a `PIL.Image.Image` instance


        from pydantic import parse_obj_as

        from docarray import BaseDoc
        from docarray.typing import ImageUrl

        img_url = ""

        img_url = parse_obj_as(ImageUrl, img_url)
        img = img_url.load_pil()

        from PIL.Image import Image

        assert isinstance(img, Image)

        :return: a Pillow image
        PIL = import_library('PIL', raise_error=True)  # noqa: F841
        from PIL import Image as PILImage


    def load(
        width: Optional[int] = None,
        height: Optional[int] = None,
        axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
    ) -> ImageNdArray:
        Load the image from the [`ImageBytes`][docarray.typing.ImageBytes] into an


        from docarray import BaseDoc
        from docarray.typing import ImageNdArray, ImageUrl

        class MyDoc(BaseDoc):
            img_url: ImageUrl

        doc = MyDoc(

        img_tensor = doc.img_url.load()
        assert isinstance(img_tensor, ImageNdArray)

        img_tensor = doc.img_url.load(height=224, width=224)
        assert img_tensor.shape == (224, 224, 3)

        layout = ('C', 'W', 'H')
        img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
        assert img_tensor.shape == (3, 200, 100)


        :param width: width of the image tensor.
        :param height: height of the image tensor.
        :param axis_layout: ordering of the different image axes.
            'H' = height, 'W' = width, 'C' = color channel
        :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
        raw_img = self.load_pil()

        if width or height:
            new_width = width or raw_img.width
            new_height = height or raw_img.height
            raw_img = raw_img.resize((new_width, new_height))
            tensor = np.array(raw_img.convert('RGB'))
        except Exception:
            tensor = np.array(raw_img)

        img = self._move_channel_axis(tensor, axis_layout=axis_layout)
        return parse_obj_as(ImageNdArray, img)

    def _move_channel_axis(
        tensor: np.ndarray, axis_layout: Tuple[str, str, str] = ('H', 'W', 'C')
    ) -> np.ndarray:
        """Moves channel axis around."""
        channel_to_offset = {'H': 0, 'W': 1, 'C': 2}
        permutation = tuple(channel_to_offset[axis] for axis in axis_layout)
        return np.transpose(tensor, permutation)

load(width=None, height=None, axis_layout=('H', 'W', 'C'))

Load the image from the ImageBytes into an ImageNdArray.

from docarray import BaseDoc
from docarray.typing import ImageNdArray, ImageUrl

class MyDoc(BaseDoc):
    img_url: ImageUrl

doc = MyDoc(

img_tensor = doc.img_url.load()
assert isinstance(img_tensor, ImageNdArray)

img_tensor = doc.img_url.load(height=224, width=224)
assert img_tensor.shape == (224, 224, 3)

layout = ('C', 'W', 'H')
img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
assert img_tensor.shape == (3, 200, 100)


Name Type Description Default
width Optional[int]

width of the image tensor.

height Optional[int]

height of the image tensor.

axis_layout Tuple[str, str, str]

ordering of the different image axes. 'H' = height, 'W' = width, 'C' = color channel

('H', 'W', 'C')


Type Description

ImageNdArray representing the image as RGB values

Source code in docarray/typing/bytes/
def load(
    width: Optional[int] = None,
    height: Optional[int] = None,
    axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
) -> ImageNdArray:
    Load the image from the [`ImageBytes`][docarray.typing.ImageBytes] into an


    from docarray import BaseDoc
    from docarray.typing import ImageNdArray, ImageUrl

    class MyDoc(BaseDoc):
        img_url: ImageUrl

    doc = MyDoc(

    img_tensor = doc.img_url.load()
    assert isinstance(img_tensor, ImageNdArray)

    img_tensor = doc.img_url.load(height=224, width=224)
    assert img_tensor.shape == (224, 224, 3)

    layout = ('C', 'W', 'H')
    img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
    assert img_tensor.shape == (3, 200, 100)


    :param width: width of the image tensor.
    :param height: height of the image tensor.
    :param axis_layout: ordering of the different image axes.
        'H' = height, 'W' = width, 'C' = color channel
    :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
    raw_img = self.load_pil()

    if width or height:
        new_width = width or raw_img.width
        new_height = height or raw_img.height
        raw_img = raw_img.resize((new_width, new_height))
        tensor = np.array(raw_img.convert('RGB'))
    except Exception:
        tensor = np.array(raw_img)

    img = self._move_channel_axis(tensor, axis_layout=axis_layout)
    return parse_obj_as(ImageNdArray, img)


Load the image from the bytes into a PIL.Image.Image instance

from pydantic import parse_obj_as

from docarray import BaseDoc
from docarray.typing import ImageUrl

img_url = ""

img_url = parse_obj_as(ImageUrl, img_url)
img = img_url.load_pil()

from PIL.Image import Image

assert isinstance(img, Image)


Type Description

a Pillow image

Source code in docarray/typing/bytes/
def load_pil(
) -> 'PILImage.Image':
    Load the image from the bytes into a `PIL.Image.Image` instance


    from pydantic import parse_obj_as

    from docarray import BaseDoc
    from docarray.typing import ImageUrl

    img_url = ""

    img_url = parse_obj_as(ImageUrl, img_url)
    img = img_url.load_pil()

    from PIL.Image import Image

    assert isinstance(img, Image)

    :return: a Pillow image
    PIL = import_library('PIL', raise_error=True)  # noqa: F841
    from PIL import Image as PILImage



Bases: BaseBytes

Bytes that store a video and that can be load into a video tensor

Source code in docarray/typing/bytes/
class VideoBytes(BaseBytes):
    Bytes that store a video and that can be load into a video tensor

    def load(self, **kwargs) -> VideoLoadResult:
        Load the video from the bytes into a VideoLoadResult object consisting of:

        - a [`VideoNdArray`][docarray.typing.VideoNdArray] (``)
        - an [`AudioNdArray`][docarray.typing.AudioNdArray] (``)
        - an [`NdArray`][docarray.typing.NdArray] containing the key frame indices (`VideoLoadResult.key_frame_indices`).


        from docarray import BaseDoc
        from docarray.typing import AudioNdArray, NdArray, VideoNdArray, VideoUrl

        class MyDoc(BaseDoc):
            video_url: VideoUrl

        doc = MyDoc(

        video, audio, key_frame_indices = doc.video_url.load()
        assert isinstance(video, VideoNdArray)
        assert isinstance(audio, AudioNdArray)
        assert isinstance(key_frame_indices, NdArray)


        :param kwargs: supports all keyword arguments that are being supported by
   as described [here](
        :return: a `VideoLoadResult` instance with video, audio and keyframe indices
        if TYPE_CHECKING:
            import av
            av = import_library('av')

        with, **kwargs) as container:
            audio_frames: List[np.ndarray] = []
            video_frames: List[np.ndarray] = []
            keyframe_indices: List[int] = []

            for frame in container.decode():
                if type(frame) ==
                elif type(frame) ==
                    if frame.key_frame == 1:
                        curr_index = len(video_frames)


        if len(audio_frames) == 0:
            audio = parse_obj_as(AudioNdArray, np.array(audio_frames))
            audio = parse_obj_as(AudioNdArray, np.stack(audio_frames))

        video = parse_obj_as(VideoNdArray, np.stack(video_frames))
        indices = parse_obj_as(NdArray, keyframe_indices)

        return VideoLoadResult(video=video, audio=audio, key_frame_indices=indices)


Load the video from the bytes into a VideoLoadResult object consisting of:

  • a VideoNdArray (
  • an AudioNdArray (
  • an NdArray containing the key frame indices (VideoLoadResult.key_frame_indices).

from docarray import BaseDoc
from docarray.typing import AudioNdArray, NdArray, VideoNdArray, VideoUrl

class MyDoc(BaseDoc):
    video_url: VideoUrl

doc = MyDoc(

video, audio, key_frame_indices = doc.video_url.load()
assert isinstance(video, VideoNdArray)
assert isinstance(audio, AudioNdArray)
assert isinstance(key_frame_indices, NdArray)


Name Type Description Default

supports all keyword arguments that are being supported by as described here



Type Description

a VideoLoadResult instance with video, audio and keyframe indices

Source code in docarray/typing/bytes/
def load(self, **kwargs) -> VideoLoadResult:
    Load the video from the bytes into a VideoLoadResult object consisting of:

    - a [`VideoNdArray`][docarray.typing.VideoNdArray] (``)
    - an [`AudioNdArray`][docarray.typing.AudioNdArray] (``)
    - an [`NdArray`][docarray.typing.NdArray] containing the key frame indices (`VideoLoadResult.key_frame_indices`).


    from docarray import BaseDoc
    from docarray.typing import AudioNdArray, NdArray, VideoNdArray, VideoUrl

    class MyDoc(BaseDoc):
        video_url: VideoUrl

    doc = MyDoc(

    video, audio, key_frame_indices = doc.video_url.load()
    assert isinstance(video, VideoNdArray)
    assert isinstance(audio, AudioNdArray)
    assert isinstance(key_frame_indices, NdArray)


    :param kwargs: supports all keyword arguments that are being supported by as described [here](
    :return: a `VideoLoadResult` instance with video, audio and keyframe indices
        import av
        av = import_library('av')

    with, **kwargs) as container:
        audio_frames: List[np.ndarray] = []
        video_frames: List[np.ndarray] = []
        keyframe_indices: List[int] = []

        for frame in container.decode():
            if type(frame) ==
            elif type(frame) ==
                if frame.key_frame == 1:
                    curr_index = len(video_frames)


    if len(audio_frames) == 0:
        audio = parse_obj_as(AudioNdArray, np.array(audio_frames))
        audio = parse_obj_as(AudioNdArray, np.stack(audio_frames))

    video = parse_obj_as(VideoNdArray, np.stack(video_frames))
    indices = parse_obj_as(NdArray, keyframe_indices)

    return VideoLoadResult(video=video, audio=audio, key_frame_indices=indices)



Bases: BaseBytes

Bytes that store an audio and that can be load into an Audio tensor

Source code in docarray/typing/bytes/
class AudioBytes(BaseBytes):
    Bytes that store an audio and that can be load into an Audio tensor

    def load(self) -> Tuple[AudioNdArray, int]:
        Load the Audio from the [`AudioBytes`][docarray.typing.AudioBytes] into an


        from typing import Optional
        from docarray import BaseDoc
        from docarray.typing import AudioBytes, AudioNdArray, AudioUrl

        class MyAudio(BaseDoc):
            url: AudioUrl
            tensor: Optional[AudioNdArray] = None
            bytes_: Optional[AudioBytes] = None
            frame_rate: Optional[float] = None

        doc = MyAudio(url='')
        doc.bytes_ = doc.url.load_bytes()
        doc.tensor, doc.frame_rate = doc.bytes_.load()

        # Note this is equivalent to do

        doc.tensor, doc.frame_rate = doc.url.load()

        assert isinstance(doc.tensor, AudioNdArray)

        :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing the
            audio bytes content, and an integer representing the frame rate.
        pydub = import_library('pydub', raise_error=True)  # noqa: F841
        from pydub import AudioSegment

        segment = AudioSegment.from_file(io.BytesIO(self))

        # Convert to float32 using NumPy
        samples = np.array(segment.get_array_of_samples())

        # Normalise float32 array so that values are between -1.0 and +1.0
        samples_norm = samples / 2 ** (segment.sample_width * 8 - 1)
        return parse_obj_as(AudioNdArray, samples_norm), segment.frame_rate

Load the Audio from the AudioBytes into an AudioNdArray.

from typing import Optional
from docarray import BaseDoc
from docarray.typing import AudioBytes, AudioNdArray, AudioUrl

class MyAudio(BaseDoc):
    url: AudioUrl
    tensor: Optional[AudioNdArray] = None
    bytes_: Optional[AudioBytes] = None
    frame_rate: Optional[float] = None

doc = MyAudio(url='')
doc.bytes_ = doc.url.load_bytes()
doc.tensor, doc.frame_rate = doc.bytes_.load()

# Note this is equivalent to do

doc.tensor, doc.frame_rate = doc.url.load()

assert isinstance(doc.tensor, AudioNdArray)


Type Description
Tuple[AudioNdArray, int]

tuple of an AudioNdArray representing the audio bytes content, and an integer representing the frame rate.

Source code in docarray/typing/bytes/
def load(self) -> Tuple[AudioNdArray, int]:
    Load the Audio from the [`AudioBytes`][docarray.typing.AudioBytes] into an


    from typing import Optional
    from docarray import BaseDoc
    from docarray.typing import AudioBytes, AudioNdArray, AudioUrl

    class MyAudio(BaseDoc):
        url: AudioUrl
        tensor: Optional[AudioNdArray] = None
        bytes_: Optional[AudioBytes] = None
        frame_rate: Optional[float] = None

    doc = MyAudio(url='')
    doc.bytes_ = doc.url.load_bytes()
    doc.tensor, doc.frame_rate = doc.bytes_.load()

    # Note this is equivalent to do

    doc.tensor, doc.frame_rate = doc.url.load()

    assert isinstance(doc.tensor, AudioNdArray)

    :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing the
        audio bytes content, and an integer representing the frame rate.
    pydub = import_library('pydub', raise_error=True)  # noqa: F841
    from pydub import AudioSegment

    segment = AudioSegment.from_file(io.BytesIO(self))

    # Convert to float32 using NumPy
    samples = np.array(segment.get_array_of_samples())

    # Normalise float32 array so that values are between -1.0 and +1.0
    samples_norm = samples / 2 ** (segment.sample_width * 8 - 1)
    return parse_obj_as(AudioNdArray, samples_norm), segment.frame_rate



Bases: bytes, AbstractType

Bytes type for docarray

Source code in docarray/typing/bytes/
class BaseBytes(bytes, AbstractType):
    Bytes type for docarray

    def _docarray_validate(
        cls: Type[T],
        value: Any,
    ) -> T:
        value = bytes_validator(value)
        return cls(value)

    def from_protobuf(cls: Type[T], pb_msg: T) -> T:
        return parse_obj_as(cls, pb_msg)

    def _to_node_protobuf(self: T) -> 'NodeProto':
        from docarray.proto import NodeProto

        return NodeProto(blob=self, type=self._proto_type_name)

    if is_pydantic_v2:

        def __get_pydantic_core_schema__(
            cls, _source_type: Any, _handler: 'GetCoreSchemaHandler'
        ) -> 'core_schema.CoreSchema':
            return core_schema.general_after_validator_function(



Bases: BaseBytes

Bytes that store an image and that can be load into an image tensor

Source code in docarray/typing/bytes/
class ImageBytes(BaseBytes):
    Bytes that store an image and that can be load into an image tensor

    def load_pil(
    ) -> 'PILImage.Image':
        Load the image from the bytes into a `PIL.Image.Image` instance


        from pydantic import parse_obj_as

        from docarray import BaseDoc
        from docarray.typing import ImageUrl

        img_url = ""

        img_url = parse_obj_as(ImageUrl, img_url)
        img = img_url.load_pil()

        from PIL.Image import Image

        assert isinstance(img, Image)

        :return: a Pillow image
        PIL = import_library('PIL', raise_error=True)  # noqa: F841
        from PIL import Image as PILImage


    def load(
        width: Optional[int] = None,
        height: Optional[int] = None,
        axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
    ) -> ImageNdArray:
        Load the image from the [`ImageBytes`][docarray.typing.ImageBytes] into an


        from docarray import BaseDoc
        from docarray.typing import ImageNdArray, ImageUrl

        class MyDoc(BaseDoc):
            img_url: ImageUrl

        doc = MyDoc(

        img_tensor = doc.img_url.load()
        assert isinstance(img_tensor, ImageNdArray)

        img_tensor = doc.img_url.load(height=224, width=224)
        assert img_tensor.shape == (224, 224, 3)

        layout = ('C', 'W', 'H')
        img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
        assert img_tensor.shape == (3, 200, 100)


        :param width: width of the image tensor.
        :param height: height of the image tensor.
        :param axis_layout: ordering of the different image axes.
            'H' = height, 'W' = width, 'C' = color channel
        :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
        raw_img = self.load_pil()

        if width or height:
            new_width = width or raw_img.width
            new_height = height or raw_img.height
            raw_img = raw_img.resize((new_width, new_height))
            tensor = np.array(raw_img.convert('RGB'))
        except Exception:
            tensor = np.array(raw_img)

        img = self._move_channel_axis(tensor, axis_layout=axis_layout)
        return parse_obj_as(ImageNdArray, img)

    def _move_channel_axis(
        tensor: np.ndarray, axis_layout: Tuple[str, str, str] = ('H', 'W', 'C')
    ) -> np.ndarray:
        """Moves channel axis around."""
        channel_to_offset = {'H': 0, 'W': 1, 'C': 2}
        permutation = tuple(channel_to_offset[axis] for axis in axis_layout)
        return np.transpose(tensor, permutation)
load(width=None, height=None, axis_layout=('H', 'W', 'C'))

Load the image from the ImageBytes into an ImageNdArray.

from docarray import BaseDoc
from docarray.typing import ImageNdArray, ImageUrl

class MyDoc(BaseDoc):
    img_url: ImageUrl

doc = MyDoc(

img_tensor = doc.img_url.load()
assert isinstance(img_tensor, ImageNdArray)

img_tensor = doc.img_url.load(height=224, width=224)
assert img_tensor.shape == (224, 224, 3)

layout = ('C', 'W', 'H')
img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
assert img_tensor.shape == (3, 200, 100)


Name Type Description Default
width Optional[int]

width of the image tensor.

height Optional[int]

height of the image tensor.

axis_layout Tuple[str, str, str]

ordering of the different image axes. 'H' = height, 'W' = width, 'C' = color channel

('H', 'W', 'C')


Type Description

ImageNdArray representing the image as RGB values

Source code in docarray/typing/bytes/
def load(
    width: Optional[int] = None,
    height: Optional[int] = None,
    axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
) -> ImageNdArray:
    Load the image from the [`ImageBytes`][docarray.typing.ImageBytes] into an


    from docarray import BaseDoc
    from docarray.typing import ImageNdArray, ImageUrl

    class MyDoc(BaseDoc):
        img_url: ImageUrl

    doc = MyDoc(

    img_tensor = doc.img_url.load()
    assert isinstance(img_tensor, ImageNdArray)

    img_tensor = doc.img_url.load(height=224, width=224)
    assert img_tensor.shape == (224, 224, 3)

    layout = ('C', 'W', 'H')
    img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
    assert img_tensor.shape == (3, 200, 100)


    :param width: width of the image tensor.
    :param height: height of the image tensor.
    :param axis_layout: ordering of the different image axes.
        'H' = height, 'W' = width, 'C' = color channel
    :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
    raw_img = self.load_pil()

    if width or height:
        new_width = width or raw_img.width
        new_height = height or raw_img.height
        raw_img = raw_img.resize((new_width, new_height))
        tensor = np.array(raw_img.convert('RGB'))
    except Exception:
        tensor = np.array(raw_img)

    img = self._move_channel_axis(tensor, axis_layout=axis_layout)
    return parse_obj_as(ImageNdArray, img)

Load the image from the bytes into a PIL.Image.Image instance

from pydantic import parse_obj_as

from docarray import BaseDoc
from docarray.typing import ImageUrl

img_url = ""

img_url = parse_obj_as(ImageUrl, img_url)
img = img_url.load_pil()

from PIL.Image import Image

assert isinstance(img, Image)


Type Description

a Pillow image

Source code in docarray/typing/bytes/
def load_pil(
) -> 'PILImage.Image':
    Load the image from the bytes into a `PIL.Image.Image` instance


    from pydantic import parse_obj_as

    from docarray import BaseDoc
    from docarray.typing import ImageUrl

    img_url = ""

    img_url = parse_obj_as(ImageUrl, img_url)
    img = img_url.load_pil()

    from PIL.Image import Image

    assert isinstance(img, Image)

    :return: a Pillow image
    PIL = import_library('PIL', raise_error=True)  # noqa: F841
    from PIL import Image as PILImage




Bases: BaseBytes

Bytes that store a video and that can be load into a video tensor

Source code in docarray/typing/bytes/
class VideoBytes(BaseBytes):
    Bytes that store a video and that can be load into a video tensor

    def load(self, **kwargs) -> VideoLoadResult:
        Load the video from the bytes into a VideoLoadResult object consisting of:

        - a [`VideoNdArray`][docarray.typing.VideoNdArray] (``)
        - an [`AudioNdArray`][docarray.typing.AudioNdArray] (``)
        - an [`NdArray`][docarray.typing.NdArray] containing the key frame indices (`VideoLoadResult.key_frame_indices`).


        from docarray import BaseDoc
        from docarray.typing import AudioNdArray, NdArray, VideoNdArray, VideoUrl

        class MyDoc(BaseDoc):
            video_url: VideoUrl

        doc = MyDoc(

        video, audio, key_frame_indices = doc.video_url.load()
        assert isinstance(video, VideoNdArray)
        assert isinstance(audio, AudioNdArray)
        assert isinstance(key_frame_indices, NdArray)


        :param kwargs: supports all keyword arguments that are being supported by
   as described [here](
        :return: a `VideoLoadResult` instance with video, audio and keyframe indices
        if TYPE_CHECKING:
            import av
            av = import_library('av')

        with, **kwargs) as container:
            audio_frames: List[np.ndarray] = []
            video_frames: List[np.ndarray] = []
            keyframe_indices: List[int] = []

            for frame in container.decode():
                if type(frame) ==
                elif type(frame) ==
                    if frame.key_frame == 1:
                        curr_index = len(video_frames)


        if len(audio_frames) == 0:
            audio = parse_obj_as(AudioNdArray, np.array(audio_frames))
            audio = parse_obj_as(AudioNdArray, np.stack(audio_frames))

        video = parse_obj_as(VideoNdArray, np.stack(video_frames))
        indices = parse_obj_as(NdArray, keyframe_indices)

        return VideoLoadResult(video=video, audio=audio, key_frame_indices=indices)

Load the video from the bytes into a VideoLoadResult object consisting of:

  • a VideoNdArray (
  • an AudioNdArray (
  • an NdArray containing the key frame indices (VideoLoadResult.key_frame_indices).

from docarray import BaseDoc
from docarray.typing import AudioNdArray, NdArray, VideoNdArray, VideoUrl

class MyDoc(BaseDoc):
    video_url: VideoUrl

doc = MyDoc(

video, audio, key_frame_indices = doc.video_url.load()
assert isinstance(video, VideoNdArray)
assert isinstance(audio, AudioNdArray)
assert isinstance(key_frame_indices, NdArray)


Name Type Description Default

supports all keyword arguments that are being supported by as described here



Type Description

a VideoLoadResult instance with video, audio and keyframe indices

Source code in docarray/typing/bytes/
def load(self, **kwargs) -> VideoLoadResult:
    Load the video from the bytes into a VideoLoadResult object consisting of:

    - a [`VideoNdArray`][docarray.typing.VideoNdArray] (``)
    - an [`AudioNdArray`][docarray.typing.AudioNdArray] (``)
    - an [`NdArray`][docarray.typing.NdArray] containing the key frame indices (`VideoLoadResult.key_frame_indices`).


    from docarray import BaseDoc
    from docarray.typing import AudioNdArray, NdArray, VideoNdArray, VideoUrl

    class MyDoc(BaseDoc):
        video_url: VideoUrl

    doc = MyDoc(

    video, audio, key_frame_indices = doc.video_url.load()
    assert isinstance(video, VideoNdArray)
    assert isinstance(audio, AudioNdArray)
    assert isinstance(key_frame_indices, NdArray)


    :param kwargs: supports all keyword arguments that are being supported by as described [here](
    :return: a `VideoLoadResult` instance with video, audio and keyframe indices
        import av
        av = import_library('av')

    with, **kwargs) as container:
        audio_frames: List[np.ndarray] = []
        video_frames: List[np.ndarray] = []
        keyframe_indices: List[int] = []

        for frame in container.decode():
            if type(frame) ==
            elif type(frame) ==
                if frame.key_frame == 1:
                    curr_index = len(video_frames)


    if len(audio_frames) == 0:
        audio = parse_obj_as(AudioNdArray, np.array(audio_frames))
        audio = parse_obj_as(AudioNdArray, np.stack(audio_frames))

    video = parse_obj_as(VideoNdArray, np.stack(video_frames))
    indices = parse_obj_as(NdArray, keyframe_indices)

    return VideoLoadResult(video=video, audio=audio, key_frame_indices=indices)