Skip to content

Url

docarray.typing.url

AnyUrl

Bases: BaseAnyUrl, AbstractType

Source code in docarray/typing/url/any_url.py
@_register_proto(proto_type_name='any_url')
class AnyUrl(BaseAnyUrl, AbstractType):
    host_required = (
        False  # turn off host requirement to allow passing of local paths as URL
    )

    def _to_node_protobuf(self) -> 'NodeProto':
        """Convert Document into a NodeProto protobuf message. This function should
        be called when the Document is nested into another Document that need to
        be converted into a protobuf

        :return: the nested item protobuf message
        """
        from docarray.proto import NodeProto

        return NodeProto(text=str(self), type=self._proto_type_name)

    @classmethod
    def validate(
        cls: Type[T],
        value: Union[T, np.ndarray, Any],
        field: 'ModelField',
        config: 'BaseConfig',
    ) -> T:
        import os

        abs_path: Union[T, np.ndarray, Any]
        if (
            isinstance(value, str)
            and not value.startswith('http')
            and not os.path.isabs(value)
        ):
            input_is_relative_path = True
            abs_path = os.path.abspath(value)
        else:
            input_is_relative_path = False
            abs_path = value

        url = super().validate(abs_path, field, config)  # basic url validation

        if input_is_relative_path:
            return cls(str(value), scheme=None)
        else:
            return cls(str(url), scheme=None)

    @classmethod
    def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
        """
        A method used to validate parts of a URL.
        Our URLs should be able to function both in local and remote settings.
        Therefore, we allow missing `scheme`, making it possible to pass a file
        path without prefix.
        If `scheme` is missing, we assume it is a local file path.
        """
        scheme = parts['scheme']
        if scheme is None:
            # allow missing scheme, unlike pydantic
            pass

        elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
            raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

        if validate_port:
            cls._validate_port(parts['port'])

        user = parts['user']
        if cls.user_required and user is None:
            raise errors.UrlUserInfoError()

        return parts

    @classmethod
    def build(
        cls,
        *,
        scheme: str,
        user: Optional[str] = None,
        password: Optional[str] = None,
        host: str,
        port: Optional[str] = None,
        path: Optional[str] = None,
        query: Optional[str] = None,
        fragment: Optional[str] = None,
        **_kwargs: str,
    ) -> str:
        """
        Build a URL from its parts.
        The only difference from the pydantic implementation is that we allow
        missing `scheme`, making it possible to pass a file path without prefix.
        """

        # allow missing scheme, unlike pydantic
        scheme_ = scheme if scheme is not None else ''
        url = super().build(
            scheme=scheme_,
            user=user,
            password=password,
            host=host,
            port=port,
            path=path,
            query=query,
            fragment=fragment,
            **_kwargs,
        )
        if scheme is None and url.startswith('://'):
            # remove the `://` prefix, since scheme is missing
            url = url[3:]
        return url

    @classmethod
    def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
        """
        Read url from a proto msg.
        :param pb_msg:
        :return: url
        """
        return parse_obj_as(cls, pb_msg)

    def load_bytes(self, timeout: Optional[float] = None) -> bytes:
        """Convert url to bytes. This will either load or download the file and save
        it into a bytes object.
        :param timeout: timeout for urlopen. Only relevant if URI is not local
        :return: bytes.
        """
        if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
            req = urllib.request.Request(self, headers={'User-Agent': 'Mozilla/5.0'})
            urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
            with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
                return fp.read()
        elif os.path.exists(self):
            with open(self, 'rb') as fp:
                return fp.read()
        else:
            raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')

build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url

from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)

load_bytes(timeout=None)

Convert url to bytes. This will either load or download the file and save it into a bytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if URI is not local

None

Returns:

Type Description
bytes

bytes.

Source code in docarray/typing/url/any_url.py
def load_bytes(self, timeout: Optional[float] = None) -> bytes:
    """Convert url to bytes. This will either load or download the file and save
    it into a bytes object.
    :param timeout: timeout for urlopen. Only relevant if URI is not local
    :return: bytes.
    """
    if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
        req = urllib.request.Request(self, headers={'User-Agent': 'Mozilla/5.0'})
        urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
        with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
            return fp.read()
    elif os.path.exists(self):
        with open(self, 'rb') as fp:
            return fp.read()
    else:
        raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')

validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

AudioUrl

Bases: AnyUrl

URL to an audio file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/audio_url.py
@_register_proto(proto_type_name='audio_url')
class AudioUrl(AnyUrl):
    """
    URL to an audio file.
    Can be remote (web) URL, or a local file path.
    """

    def load(self: T) -> Tuple[AudioNdArray, int]:
        """
        Load the data from the url into an [`AudioNdArray`][docarray.typing.AudioNdArray]
        and the frame rate.

        ---

        ```python
        from typing import Optional

        from docarray import BaseDoc
        from docarray.typing import AudioNdArray, AudioUrl


        class MyDoc(BaseDoc):
            audio_url: AudioUrl
            audio_tensor: Optional[AudioNdArray]


        doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
        doc.audio_tensor, _ = doc.audio_url.load()
        assert isinstance(doc.audio_tensor, AudioNdArray)
        ```

        ---

        :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing
            the audio file content, and an integer representing the frame rate.

        """
        bytes_ = self.load_bytes()
        return bytes_.load()

    def load_bytes(self, timeout: Optional[float] = None) -> AudioBytes:
        """
        Convert url to [`AudioBytes`][docarray.typing.AudioBytes]. This will either load or
        download the file and save it into an [`AudioBytes`][docarray.typing.AudioBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`AudioBytes`][docarray.typing.AudioBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return AudioBytes(bytes_)

    def display(self):
        """
        Play the audio sound from url in notebook.
        """
        if is_notebook():
            from IPython.display import Audio, display

            remote_url = True if self.startswith('http') else False

            if remote_url:
                display(Audio(data=self))
            else:
                display(Audio(filename=self))
        else:
            warnings.warn('Display of audio is only possible in a notebook.')

display()

Play the audio sound from url in notebook.

Source code in docarray/typing/url/audio_url.py
def display(self):
    """
    Play the audio sound from url in notebook.
    """
    if is_notebook():
        from IPython.display import Audio, display

        remote_url = True if self.startswith('http') else False

        if remote_url:
            display(Audio(data=self))
        else:
            display(Audio(filename=self))
    else:
        warnings.warn('Display of audio is only possible in a notebook.')

load()

Load the data from the url into an AudioNdArray and the frame rate.


from typing import Optional

from docarray import BaseDoc
from docarray.typing import AudioNdArray, AudioUrl


class MyDoc(BaseDoc):
    audio_url: AudioUrl
    audio_tensor: Optional[AudioNdArray]


doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
doc.audio_tensor, _ = doc.audio_url.load()
assert isinstance(doc.audio_tensor, AudioNdArray)

Returns:

Type Description
Tuple[AudioNdArray, int]

tuple of an AudioNdArray representing the audio file content, and an integer representing the frame rate.

Source code in docarray/typing/url/audio_url.py
def load(self: T) -> Tuple[AudioNdArray, int]:
    """
    Load the data from the url into an [`AudioNdArray`][docarray.typing.AudioNdArray]
    and the frame rate.

    ---

    ```python
    from typing import Optional

    from docarray import BaseDoc
    from docarray.typing import AudioNdArray, AudioUrl


    class MyDoc(BaseDoc):
        audio_url: AudioUrl
        audio_tensor: Optional[AudioNdArray]


    doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
    doc.audio_tensor, _ = doc.audio_url.load()
    assert isinstance(doc.audio_tensor, AudioNdArray)
    ```

    ---

    :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing
        the audio file content, and an integer representing the frame rate.

    """
    bytes_ = self.load_bytes()
    return bytes_.load()

load_bytes(timeout=None)

Convert url to AudioBytes. This will either load or download the file and save it into an AudioBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
AudioBytes

AudioBytes object

Source code in docarray/typing/url/audio_url.py
def load_bytes(self, timeout: Optional[float] = None) -> AudioBytes:
    """
    Convert url to [`AudioBytes`][docarray.typing.AudioBytes]. This will either load or
    download the file and save it into an [`AudioBytes`][docarray.typing.AudioBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`AudioBytes`][docarray.typing.AudioBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return AudioBytes(bytes_)

ImageUrl

Bases: AnyUrl

URL to an image file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/image_url.py
@_register_proto(proto_type_name='image_url')
class ImageUrl(AnyUrl):
    """
    URL to an image file.
    Can be remote (web) URL, or a local file path.
    """

    def load_pil(self, timeout: Optional[float] = None) -> 'PILImage.Image':
        """
        Load the image from the bytes into a `PIL.Image.Image` instance

        ---

        ```python
        from pydantic import parse_obj_as

        from docarray import BaseDoc
        from docarray.typing import ImageUrl

        img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

        img_url = parse_obj_as(ImageUrl, img_url)
        img = img_url.load_pil()

        from PIL.Image import Image

        assert isinstance(img, Image)
        ```

        ---
        :return: a Pillow image
        """
        from docarray.typing.bytes.image_bytes import ImageBytes

        return ImageBytes(self.load_bytes(timeout=timeout)).load_pil()

    def load(
        self,
        width: Optional[int] = None,
        height: Optional[int] = None,
        axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
        timeout: Optional[float] = None,
    ) -> ImageNdArray:
        """
        Load the data from the url into an [`ImageNdArray`][docarray.typing.ImageNdArray]

        ---

        ```python
        from docarray import BaseDoc
        from docarray.typing import ImageUrl, ImageNdArray


        class MyDoc(BaseDoc):
            img_url: ImageUrl


        doc = MyDoc(
            img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
            "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
        )

        img_tensor = doc.img_url.load()
        assert isinstance(img_tensor, ImageNdArray)

        img_tensor = doc.img_url.load(height=224, width=224)
        assert img_tensor.shape == (224, 224, 3)

        layout = ('C', 'W', 'H')
        img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
        assert img_tensor.shape == (3, 200, 100)
        ```

        ---

        :param width: width of the image tensor.
        :param height: height of the image tensor.
        :param axis_layout: ordering of the different image axes.
            'H' = height, 'W' = width, 'C' = color channel
        :param timeout: timeout (sec) for urlopen network request.
            Only relevant if URL is not local
        :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
        """
        from docarray.typing.bytes.image_bytes import ImageBytes

        buffer = ImageBytes(self.load_bytes(timeout=timeout))
        return buffer.load(width, height, axis_layout)

    def load_bytes(self, timeout: Optional[float] = None) -> ImageBytes:
        """
        Convert url to [`ImageBytes`][docarray.typing.ImageBytes]. This will either load or
        download the file and save it into an [`ImageBytes`][docarray.typing.ImageBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`ImageBytes`][docarray.typing.ImageBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return ImageBytes(bytes_)

    def display(self) -> None:
        """
        Display image data from url in notebook.
        """
        if is_notebook():
            from IPython.display import Image, display

            remote_url = True if self.startswith('http') else False
            if remote_url:
                display(Image(url=self))
            else:
                display(Image(filename=self))
        else:
            warnings.warn('Display of image is only possible in a notebook.')

display()

Display image data from url in notebook.

Source code in docarray/typing/url/image_url.py
def display(self) -> None:
    """
    Display image data from url in notebook.
    """
    if is_notebook():
        from IPython.display import Image, display

        remote_url = True if self.startswith('http') else False
        if remote_url:
            display(Image(url=self))
        else:
            display(Image(filename=self))
    else:
        warnings.warn('Display of image is only possible in a notebook.')

load(width=None, height=None, axis_layout=('H', 'W', 'C'), timeout=None)

Load the data from the url into an ImageNdArray


from docarray import BaseDoc
from docarray.typing import ImageUrl, ImageNdArray


class MyDoc(BaseDoc):
    img_url: ImageUrl


doc = MyDoc(
    img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
    "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
)

img_tensor = doc.img_url.load()
assert isinstance(img_tensor, ImageNdArray)

img_tensor = doc.img_url.load(height=224, width=224)
assert img_tensor.shape == (224, 224, 3)

layout = ('C', 'W', 'H')
img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
assert img_tensor.shape == (3, 200, 100)

Parameters:

Name Type Description Default
width Optional[int]

width of the image tensor.

None
height Optional[int]

height of the image tensor.

None
axis_layout Tuple[str, str, str]

ordering of the different image axes. 'H' = height, 'W' = width, 'C' = color channel

('H', 'W', 'C')
timeout Optional[float]

timeout (sec) for urlopen network request. Only relevant if URL is not local

None

Returns:

Type Description
ImageNdArray

ImageNdArray representing the image as RGB values

Source code in docarray/typing/url/image_url.py
def load(
    self,
    width: Optional[int] = None,
    height: Optional[int] = None,
    axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
    timeout: Optional[float] = None,
) -> ImageNdArray:
    """
    Load the data from the url into an [`ImageNdArray`][docarray.typing.ImageNdArray]

    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import ImageUrl, ImageNdArray


    class MyDoc(BaseDoc):
        img_url: ImageUrl


    doc = MyDoc(
        img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
        "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
    )

    img_tensor = doc.img_url.load()
    assert isinstance(img_tensor, ImageNdArray)

    img_tensor = doc.img_url.load(height=224, width=224)
    assert img_tensor.shape == (224, 224, 3)

    layout = ('C', 'W', 'H')
    img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
    assert img_tensor.shape == (3, 200, 100)
    ```

    ---

    :param width: width of the image tensor.
    :param height: height of the image tensor.
    :param axis_layout: ordering of the different image axes.
        'H' = height, 'W' = width, 'C' = color channel
    :param timeout: timeout (sec) for urlopen network request.
        Only relevant if URL is not local
    :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
    """
    from docarray.typing.bytes.image_bytes import ImageBytes

    buffer = ImageBytes(self.load_bytes(timeout=timeout))
    return buffer.load(width, height, axis_layout)

load_bytes(timeout=None)

Convert url to ImageBytes. This will either load or download the file and save it into an ImageBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
ImageBytes

ImageBytes object

Source code in docarray/typing/url/image_url.py
def load_bytes(self, timeout: Optional[float] = None) -> ImageBytes:
    """
    Convert url to [`ImageBytes`][docarray.typing.ImageBytes]. This will either load or
    download the file and save it into an [`ImageBytes`][docarray.typing.ImageBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`ImageBytes`][docarray.typing.ImageBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return ImageBytes(bytes_)

load_pil(timeout=None)

Load the image from the bytes into a PIL.Image.Image instance


from pydantic import parse_obj_as

from docarray import BaseDoc
from docarray.typing import ImageUrl

img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

img_url = parse_obj_as(ImageUrl, img_url)
img = img_url.load_pil()

from PIL.Image import Image

assert isinstance(img, Image)

Returns:

Type Description
Image

a Pillow image

Source code in docarray/typing/url/image_url.py
def load_pil(self, timeout: Optional[float] = None) -> 'PILImage.Image':
    """
    Load the image from the bytes into a `PIL.Image.Image` instance

    ---

    ```python
    from pydantic import parse_obj_as

    from docarray import BaseDoc
    from docarray.typing import ImageUrl

    img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

    img_url = parse_obj_as(ImageUrl, img_url)
    img = img_url.load_pil()

    from PIL.Image import Image

    assert isinstance(img, Image)
    ```

    ---
    :return: a Pillow image
    """
    from docarray.typing.bytes.image_bytes import ImageBytes

    return ImageBytes(self.load_bytes(timeout=timeout)).load_pil()

Mesh3DUrl

Bases: Url3D

URL to a file containing 3D mesh information. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/url_3d/mesh_url.py
@_register_proto(proto_type_name='mesh_url')
class Mesh3DUrl(Url3D):
    """
    URL to a file containing 3D mesh information.
    Can be remote (web) URL, or a local file path.
    """

    def load(
        self: T,
        skip_materials: bool = True,
        trimesh_args: Optional[Dict[str, Any]] = None,
    ) -> 'VerticesAndFaces':
        """
        Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
        object containing vertices and faces information.

        ---

        ```python
        from docarray import BaseDoc

        from docarray.typing import Mesh3DUrl, NdArray


        class MyDoc(BaseDoc):
            mesh_url: Mesh3DUrl


        doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        tensors = doc.mesh_url.load()
        assert isinstance(tensors.vertices, NdArray)
        assert isinstance(tensors.faces, NdArray)
        ```


        :param skip_materials: Skip materials if True, else skip.
        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
            or `trimesh.load_remote()`.
        :return: VerticesAndFaces object containing vertices and faces information.
        """
        from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces

        if not trimesh_args:
            trimesh_args = {}
        mesh = self._load_trimesh_instance(
            force='mesh', skip_materials=skip_materials, **trimesh_args
        )

        vertices = parse_obj_as(NdArray, mesh.vertices.view(np.ndarray))
        faces = parse_obj_as(NdArray, mesh.faces.view(np.ndarray))

        return VerticesAndFaces(vertices=vertices, faces=faces)

    def display(self) -> None:
        """
        Plot mesh from url.
        This loads the Trimesh instance of the 3D mesh, and then displays it.
        """
        from IPython.display import display

        mesh = self._load_trimesh_instance(skip_materials=False)
        display(mesh.show())

display()

Plot mesh from url. This loads the Trimesh instance of the 3D mesh, and then displays it.

Source code in docarray/typing/url/url_3d/mesh_url.py
def display(self) -> None:
    """
    Plot mesh from url.
    This loads the Trimesh instance of the 3D mesh, and then displays it.
    """
    from IPython.display import display

    mesh = self._load_trimesh_instance(skip_materials=False)
    display(mesh.show())

load(skip_materials=True, trimesh_args=None)

Load the data from the url into a VerticesAndFaces object containing vertices and faces information.


from docarray import BaseDoc

from docarray.typing import Mesh3DUrl, NdArray


class MyDoc(BaseDoc):
    mesh_url: Mesh3DUrl


doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

tensors = doc.mesh_url.load()
assert isinstance(tensors.vertices, NdArray)
assert isinstance(tensors.faces, NdArray)

Parameters:

Name Type Description Default
skip_materials bool

Skip materials if True, else skip.

True
trimesh_args Optional[Dict[str, Any]]

dictionary of additional arguments for trimesh.load() or trimesh.load_remote().

None

Returns:

Type Description
VerticesAndFaces

VerticesAndFaces object containing vertices and faces information.

Source code in docarray/typing/url/url_3d/mesh_url.py
def load(
    self: T,
    skip_materials: bool = True,
    trimesh_args: Optional[Dict[str, Any]] = None,
) -> 'VerticesAndFaces':
    """
    Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
    object containing vertices and faces information.

    ---

    ```python
    from docarray import BaseDoc

    from docarray.typing import Mesh3DUrl, NdArray


    class MyDoc(BaseDoc):
        mesh_url: Mesh3DUrl


    doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    tensors = doc.mesh_url.load()
    assert isinstance(tensors.vertices, NdArray)
    assert isinstance(tensors.faces, NdArray)
    ```


    :param skip_materials: Skip materials if True, else skip.
    :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
        or `trimesh.load_remote()`.
    :return: VerticesAndFaces object containing vertices and faces information.
    """
    from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces

    if not trimesh_args:
        trimesh_args = {}
    mesh = self._load_trimesh_instance(
        force='mesh', skip_materials=skip_materials, **trimesh_args
    )

    vertices = parse_obj_as(NdArray, mesh.vertices.view(np.ndarray))
    faces = parse_obj_as(NdArray, mesh.faces.view(np.ndarray))

    return VerticesAndFaces(vertices=vertices, faces=faces)

PointCloud3DUrl

Bases: Url3D

URL to a file containing point cloud information. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/url_3d/point_cloud_url.py
@_register_proto(proto_type_name='point_cloud_url')
class PointCloud3DUrl(Url3D):
    """
    URL to a file containing point cloud information.
    Can be remote (web) URL, or a local file path.
    """

    def load(
        self: T,
        samples: int,
        multiple_geometries: bool = False,
        skip_materials: bool = True,
        trimesh_args: Optional[Dict[str, Any]] = None,
    ) -> 'PointsAndColors':
        """
        Load the data from the url into an `NdArray` containing point cloud information.


        ---

        ```python
        import numpy as np
        from docarray import BaseDoc

        from docarray.typing import PointCloud3DUrl


        class MyDoc(BaseDoc):
            point_cloud_url: PointCloud3DUrl


        doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        # point_cloud = doc.point_cloud_url.load(samples=100)

        # assert isinstance(point_cloud, np.ndarray)
        # assert point_cloud.shape == (100, 3)
        ```

        ---

        :param samples: number of points to sample from the mesh
        :param multiple_geometries: if False, store point cloud in 2D np.ndarray.
            If True, store point clouds from multiple geometries in 3D np.ndarray.
        :param skip_materials: Skip materials if True, else load.
        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
            or `trimesh.load_remote()`.

        :return: np.ndarray representing the point cloud
        """
        from docarray.documents.point_cloud.points_and_colors import PointsAndColors

        if not trimesh_args:
            trimesh_args = {}

        if multiple_geometries:
            # try to coerce everything into a scene
            scene = self._load_trimesh_instance(
                force='scene', skip_materials=skip_materials, **trimesh_args
            )
            point_cloud = np.stack(
                [np.array(geo.sample(samples)) for geo in scene.geometry.values()],
                axis=0,
            )
        else:
            # combine a scene into a single mesh
            mesh = self._load_trimesh_instance(force='mesh', **trimesh_args)
            point_cloud = np.array(mesh.sample(samples))

        points = parse_obj_as(NdArray, point_cloud)
        return PointsAndColors(points=points, colors=None)

    def display(
        self,
        samples: int = 10000,
    ) -> None:
        """
        Plot point cloud from url.

        First, it loads the point cloud into a `PointsAndColors` object, and then
        calls display on it. The following is therefore equivalent:

        ---

        ```python
        import numpy as np
        from docarray import BaseDoc

        from docarray.documents import PointCloud3D

        pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        # option 1
        # pc.url.display()

        # option 2 (equivalent)
        # pc.url.load(samples=10000).display()
        ```

        ---

        :param samples: number of points to sample from the mesh.
        """
        self.load(samples=samples, skip_materials=False).display()

display(samples=10000)

Plot point cloud from url.

First, it loads the point cloud into a PointsAndColors object, and then calls display on it. The following is therefore equivalent:


import numpy as np
from docarray import BaseDoc

from docarray.documents import PointCloud3D

pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

# option 1
# pc.url.display()

# option 2 (equivalent)
# pc.url.load(samples=10000).display()

Parameters:

Name Type Description Default
samples int

number of points to sample from the mesh.

10000
Source code in docarray/typing/url/url_3d/point_cloud_url.py
def display(
    self,
    samples: int = 10000,
) -> None:
    """
    Plot point cloud from url.

    First, it loads the point cloud into a `PointsAndColors` object, and then
    calls display on it. The following is therefore equivalent:

    ---

    ```python
    import numpy as np
    from docarray import BaseDoc

    from docarray.documents import PointCloud3D

    pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    # option 1
    # pc.url.display()

    # option 2 (equivalent)
    # pc.url.load(samples=10000).display()
    ```

    ---

    :param samples: number of points to sample from the mesh.
    """
    self.load(samples=samples, skip_materials=False).display()

load(samples, multiple_geometries=False, skip_materials=True, trimesh_args=None)

Load the data from the url into an NdArray containing point cloud information.


import numpy as np
from docarray import BaseDoc

from docarray.typing import PointCloud3DUrl


class MyDoc(BaseDoc):
    point_cloud_url: PointCloud3DUrl


doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

# point_cloud = doc.point_cloud_url.load(samples=100)

# assert isinstance(point_cloud, np.ndarray)
# assert point_cloud.shape == (100, 3)

Parameters:

Name Type Description Default
samples int

number of points to sample from the mesh

required
multiple_geometries bool

if False, store point cloud in 2D np.ndarray. If True, store point clouds from multiple geometries in 3D np.ndarray.

False
skip_materials bool

Skip materials if True, else load.

True
trimesh_args Optional[Dict[str, Any]]

dictionary of additional arguments for trimesh.load() or trimesh.load_remote().

None

Returns:

Type Description
PointsAndColors

np.ndarray representing the point cloud

Source code in docarray/typing/url/url_3d/point_cloud_url.py
def load(
    self: T,
    samples: int,
    multiple_geometries: bool = False,
    skip_materials: bool = True,
    trimesh_args: Optional[Dict[str, Any]] = None,
) -> 'PointsAndColors':
    """
    Load the data from the url into an `NdArray` containing point cloud information.


    ---

    ```python
    import numpy as np
    from docarray import BaseDoc

    from docarray.typing import PointCloud3DUrl


    class MyDoc(BaseDoc):
        point_cloud_url: PointCloud3DUrl


    doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    # point_cloud = doc.point_cloud_url.load(samples=100)

    # assert isinstance(point_cloud, np.ndarray)
    # assert point_cloud.shape == (100, 3)
    ```

    ---

    :param samples: number of points to sample from the mesh
    :param multiple_geometries: if False, store point cloud in 2D np.ndarray.
        If True, store point clouds from multiple geometries in 3D np.ndarray.
    :param skip_materials: Skip materials if True, else load.
    :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
        or `trimesh.load_remote()`.

    :return: np.ndarray representing the point cloud
    """
    from docarray.documents.point_cloud.points_and_colors import PointsAndColors

    if not trimesh_args:
        trimesh_args = {}

    if multiple_geometries:
        # try to coerce everything into a scene
        scene = self._load_trimesh_instance(
            force='scene', skip_materials=skip_materials, **trimesh_args
        )
        point_cloud = np.stack(
            [np.array(geo.sample(samples)) for geo in scene.geometry.values()],
            axis=0,
        )
    else:
        # combine a scene into a single mesh
        mesh = self._load_trimesh_instance(force='mesh', **trimesh_args)
        point_cloud = np.array(mesh.sample(samples))

    points = parse_obj_as(NdArray, point_cloud)
    return PointsAndColors(points=points, colors=None)

TextUrl

Bases: AnyUrl

URL to a text file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/text_url.py
@_register_proto(proto_type_name='text_url')
class TextUrl(AnyUrl):
    """
    URL to a text file.
    Can be remote (web) URL, or a local file path.
    """

    def load(self, charset: str = 'utf-8', timeout: Optional[float] = None) -> str:
        """
        Load the text file into a string.


        ---

        ```python
        from docarray import BaseDoc
        from docarray.typing import TextUrl


        class MyDoc(BaseDoc):
            remote_url: TextUrl


        doc = MyDoc(
            remote_url='https://de.wikipedia.org/wiki/Brixen',
        )

        remote_txt = doc.remote_url.load()
        ```

        ---


        :param timeout: timeout (sec) for urlopen network request.
            Only relevant if URL is not local
        :param charset: decoding charset; may be any character set registered with IANA
        :return: the text file content
        """
        _bytes = self.load_bytes(timeout=timeout)
        return _bytes.decode(charset)

load(charset='utf-8', timeout=None)

Load the text file into a string.


from docarray import BaseDoc
from docarray.typing import TextUrl


class MyDoc(BaseDoc):
    remote_url: TextUrl


doc = MyDoc(
    remote_url='https://de.wikipedia.org/wiki/Brixen',
)

remote_txt = doc.remote_url.load()

Parameters:

Name Type Description Default
timeout Optional[float]

timeout (sec) for urlopen network request. Only relevant if URL is not local

None
charset str

decoding charset; may be any character set registered with IANA

'utf-8'

Returns:

Type Description
str

the text file content

Source code in docarray/typing/url/text_url.py
def load(self, charset: str = 'utf-8', timeout: Optional[float] = None) -> str:
    """
    Load the text file into a string.


    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import TextUrl


    class MyDoc(BaseDoc):
        remote_url: TextUrl


    doc = MyDoc(
        remote_url='https://de.wikipedia.org/wiki/Brixen',
    )

    remote_txt = doc.remote_url.load()
    ```

    ---


    :param timeout: timeout (sec) for urlopen network request.
        Only relevant if URL is not local
    :param charset: decoding charset; may be any character set registered with IANA
    :return: the text file content
    """
    _bytes = self.load_bytes(timeout=timeout)
    return _bytes.decode(charset)

VideoUrl

Bases: AnyUrl

URL to a video file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/video_url.py
@_register_proto(proto_type_name='video_url')
class VideoUrl(AnyUrl):
    """
    URL to a video file.
    Can be remote (web) URL, or a local file path.
    """

    def load(self: T, **kwargs) -> VideoLoadResult:
        """
        Load the data from the url into a `NamedTuple` of
        [`VideoNdArray`][docarray.typing.VideoNdArray],
        [`AudioNdArray`][docarray.typing.AudioNdArray]
        and [`NdArray`][docarray.typing.NdArray].

        ---

        ```python
        from typing import Optional

        from docarray import BaseDoc

        from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray


        class MyDoc(BaseDoc):
            video_url: VideoUrl
            video: Optional[VideoNdArray]
            audio: Optional[AudioNdArray]
            key_frame_indices: Optional[NdArray]


        doc = MyDoc(
            video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true'
        )
        doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load()

        assert isinstance(doc.video, VideoNdArray)
        assert isinstance(doc.audio, AudioNdArray)
        assert isinstance(doc.key_frame_indices, NdArray)
        ```

        ---

        You can load only the key frames (or video, audio respectively):

        ---

        ```python
        from pydantic import parse_obj_as

        from docarray.typing import NdArray, VideoUrl


        url = parse_obj_as(
            VideoUrl,
            'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true',
        )
        key_frame_indices = url.load().key_frame_indices
        assert isinstance(key_frame_indices, NdArray)
        ```

        ---

        :param kwargs: supports all keyword arguments that are being supported by
            av.open() as described [here](https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open)

        :return: [`AudioNdArray`][docarray.typing.AudioNdArray] representing the audio content,
            [`VideoNdArray`][docarray.typing.VideoNdArray] representing the images of the video,
            [`NdArray`][docarray.typing.NdArray] of the key frame indices.
        """
        buffer = self.load_bytes(**kwargs)
        return buffer.load()

    def load_bytes(self, timeout: Optional[float] = None) -> VideoBytes:
        """
        Convert url to [`VideoBytes`][docarray.typing.VideoBytes]. This will either load or download
        the file and save it into an [`VideoBytes`][docarray.typing.VideoBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`VideoBytes`][docarray.typing.VideoBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return VideoBytes(bytes_)

    def display(self):
        """
        Play video from url in notebook.
        """
        if is_notebook():
            from IPython.display import display

            remote_url = True if self.startswith('http') else False

            if remote_url:
                from IPython.display import Video

                b = self.load_bytes()
                display(Video(data=b, embed=True, mimetype='video/mp4'))
            else:
                import os

                from IPython.display import HTML

                path = os.path.relpath(self)
                src = f'''
                    <body>
                    <video width="320" height="240" autoplay muted controls>
                    <source src="{path}">
                    Your browser does not support the video tag.
                    </video>
                    </body>
                    '''
                display(HTML(src))

        else:
            warnings.warn('Display of video is only possible in a notebook.')

display()

Play video from url in notebook.

Source code in docarray/typing/url/video_url.py
def display(self):
    """
    Play video from url in notebook.
    """
    if is_notebook():
        from IPython.display import display

        remote_url = True if self.startswith('http') else False

        if remote_url:
            from IPython.display import Video

            b = self.load_bytes()
            display(Video(data=b, embed=True, mimetype='video/mp4'))
        else:
            import os

            from IPython.display import HTML

            path = os.path.relpath(self)
            src = f'''
                <body>
                <video width="320" height="240" autoplay muted controls>
                <source src="{path}">
                Your browser does not support the video tag.
                </video>
                </body>
                '''
            display(HTML(src))

    else:
        warnings.warn('Display of video is only possible in a notebook.')

load(**kwargs)

Load the data from the url into a NamedTuple of VideoNdArray, AudioNdArray and NdArray.


from typing import Optional

from docarray import BaseDoc

from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray


class MyDoc(BaseDoc):
    video_url: VideoUrl
    video: Optional[VideoNdArray]
    audio: Optional[AudioNdArray]
    key_frame_indices: Optional[NdArray]


doc = MyDoc(
    video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true'
)
doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load()

assert isinstance(doc.video, VideoNdArray)
assert isinstance(doc.audio, AudioNdArray)
assert isinstance(doc.key_frame_indices, NdArray)

You can load only the key frames (or video, audio respectively):


from pydantic import parse_obj_as

from docarray.typing import NdArray, VideoUrl


url = parse_obj_as(
    VideoUrl,
    'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true',
)
key_frame_indices = url.load().key_frame_indices
assert isinstance(key_frame_indices, NdArray)

Parameters:

Name Type Description Default
kwargs

supports all keyword arguments that are being supported by av.open() as described here

{}

Returns:

Type Description
VideoLoadResult

AudioNdArray representing the audio content, VideoNdArray representing the images of the video, NdArray of the key frame indices.

Source code in docarray/typing/url/video_url.py
def load(self: T, **kwargs) -> VideoLoadResult:
    """
    Load the data from the url into a `NamedTuple` of
    [`VideoNdArray`][docarray.typing.VideoNdArray],
    [`AudioNdArray`][docarray.typing.AudioNdArray]
    and [`NdArray`][docarray.typing.NdArray].

    ---

    ```python
    from typing import Optional

    from docarray import BaseDoc

    from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray


    class MyDoc(BaseDoc):
        video_url: VideoUrl
        video: Optional[VideoNdArray]
        audio: Optional[AudioNdArray]
        key_frame_indices: Optional[NdArray]


    doc = MyDoc(
        video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true'
    )
    doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load()

    assert isinstance(doc.video, VideoNdArray)
    assert isinstance(doc.audio, AudioNdArray)
    assert isinstance(doc.key_frame_indices, NdArray)
    ```

    ---

    You can load only the key frames (or video, audio respectively):

    ---

    ```python
    from pydantic import parse_obj_as

    from docarray.typing import NdArray, VideoUrl


    url = parse_obj_as(
        VideoUrl,
        'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true',
    )
    key_frame_indices = url.load().key_frame_indices
    assert isinstance(key_frame_indices, NdArray)
    ```

    ---

    :param kwargs: supports all keyword arguments that are being supported by
        av.open() as described [here](https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open)

    :return: [`AudioNdArray`][docarray.typing.AudioNdArray] representing the audio content,
        [`VideoNdArray`][docarray.typing.VideoNdArray] representing the images of the video,
        [`NdArray`][docarray.typing.NdArray] of the key frame indices.
    """
    buffer = self.load_bytes(**kwargs)
    return buffer.load()

load_bytes(timeout=None)

Convert url to VideoBytes. This will either load or download the file and save it into an VideoBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
VideoBytes

VideoBytes object

Source code in docarray/typing/url/video_url.py
def load_bytes(self, timeout: Optional[float] = None) -> VideoBytes:
    """
    Convert url to [`VideoBytes`][docarray.typing.VideoBytes]. This will either load or download
    the file and save it into an [`VideoBytes`][docarray.typing.VideoBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`VideoBytes`][docarray.typing.VideoBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return VideoBytes(bytes_)

any_url

AnyUrl

Bases: BaseAnyUrl, AbstractType

Source code in docarray/typing/url/any_url.py
@_register_proto(proto_type_name='any_url')
class AnyUrl(BaseAnyUrl, AbstractType):
    host_required = (
        False  # turn off host requirement to allow passing of local paths as URL
    )

    def _to_node_protobuf(self) -> 'NodeProto':
        """Convert Document into a NodeProto protobuf message. This function should
        be called when the Document is nested into another Document that need to
        be converted into a protobuf

        :return: the nested item protobuf message
        """
        from docarray.proto import NodeProto

        return NodeProto(text=str(self), type=self._proto_type_name)

    @classmethod
    def validate(
        cls: Type[T],
        value: Union[T, np.ndarray, Any],
        field: 'ModelField',
        config: 'BaseConfig',
    ) -> T:
        import os

        abs_path: Union[T, np.ndarray, Any]
        if (
            isinstance(value, str)
            and not value.startswith('http')
            and not os.path.isabs(value)
        ):
            input_is_relative_path = True
            abs_path = os.path.abspath(value)
        else:
            input_is_relative_path = False
            abs_path = value

        url = super().validate(abs_path, field, config)  # basic url validation

        if input_is_relative_path:
            return cls(str(value), scheme=None)
        else:
            return cls(str(url), scheme=None)

    @classmethod
    def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
        """
        A method used to validate parts of a URL.
        Our URLs should be able to function both in local and remote settings.
        Therefore, we allow missing `scheme`, making it possible to pass a file
        path without prefix.
        If `scheme` is missing, we assume it is a local file path.
        """
        scheme = parts['scheme']
        if scheme is None:
            # allow missing scheme, unlike pydantic
            pass

        elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
            raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

        if validate_port:
            cls._validate_port(parts['port'])

        user = parts['user']
        if cls.user_required and user is None:
            raise errors.UrlUserInfoError()

        return parts

    @classmethod
    def build(
        cls,
        *,
        scheme: str,
        user: Optional[str] = None,
        password: Optional[str] = None,
        host: str,
        port: Optional[str] = None,
        path: Optional[str] = None,
        query: Optional[str] = None,
        fragment: Optional[str] = None,
        **_kwargs: str,
    ) -> str:
        """
        Build a URL from its parts.
        The only difference from the pydantic implementation is that we allow
        missing `scheme`, making it possible to pass a file path without prefix.
        """

        # allow missing scheme, unlike pydantic
        scheme_ = scheme if scheme is not None else ''
        url = super().build(
            scheme=scheme_,
            user=user,
            password=password,
            host=host,
            port=port,
            path=path,
            query=query,
            fragment=fragment,
            **_kwargs,
        )
        if scheme is None and url.startswith('://'):
            # remove the `://` prefix, since scheme is missing
            url = url[3:]
        return url

    @classmethod
    def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
        """
        Read url from a proto msg.
        :param pb_msg:
        :return: url
        """
        return parse_obj_as(cls, pb_msg)

    def load_bytes(self, timeout: Optional[float] = None) -> bytes:
        """Convert url to bytes. This will either load or download the file and save
        it into a bytes object.
        :param timeout: timeout for urlopen. Only relevant if URI is not local
        :return: bytes.
        """
        if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
            req = urllib.request.Request(self, headers={'User-Agent': 'Mozilla/5.0'})
            urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
            with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
                return fp.read()
        elif os.path.exists(self):
            with open(self, 'rb') as fp:
                return fp.read()
        else:
            raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')
build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url
from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)
load_bytes(timeout=None)

Convert url to bytes. This will either load or download the file and save it into a bytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if URI is not local

None

Returns:

Type Description
bytes

bytes.

Source code in docarray/typing/url/any_url.py
def load_bytes(self, timeout: Optional[float] = None) -> bytes:
    """Convert url to bytes. This will either load or download the file and save
    it into a bytes object.
    :param timeout: timeout for urlopen. Only relevant if URI is not local
    :return: bytes.
    """
    if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
        req = urllib.request.Request(self, headers={'User-Agent': 'Mozilla/5.0'})
        urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
        with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
            return fp.read()
    elif os.path.exists(self):
        with open(self, 'rb') as fp:
            return fp.read()
    else:
        raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')
validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

audio_url

AudioUrl

Bases: AnyUrl

URL to an audio file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/audio_url.py
@_register_proto(proto_type_name='audio_url')
class AudioUrl(AnyUrl):
    """
    URL to an audio file.
    Can be remote (web) URL, or a local file path.
    """

    def load(self: T) -> Tuple[AudioNdArray, int]:
        """
        Load the data from the url into an [`AudioNdArray`][docarray.typing.AudioNdArray]
        and the frame rate.

        ---

        ```python
        from typing import Optional

        from docarray import BaseDoc
        from docarray.typing import AudioNdArray, AudioUrl


        class MyDoc(BaseDoc):
            audio_url: AudioUrl
            audio_tensor: Optional[AudioNdArray]


        doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
        doc.audio_tensor, _ = doc.audio_url.load()
        assert isinstance(doc.audio_tensor, AudioNdArray)
        ```

        ---

        :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing
            the audio file content, and an integer representing the frame rate.

        """
        bytes_ = self.load_bytes()
        return bytes_.load()

    def load_bytes(self, timeout: Optional[float] = None) -> AudioBytes:
        """
        Convert url to [`AudioBytes`][docarray.typing.AudioBytes]. This will either load or
        download the file and save it into an [`AudioBytes`][docarray.typing.AudioBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`AudioBytes`][docarray.typing.AudioBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return AudioBytes(bytes_)

    def display(self):
        """
        Play the audio sound from url in notebook.
        """
        if is_notebook():
            from IPython.display import Audio, display

            remote_url = True if self.startswith('http') else False

            if remote_url:
                display(Audio(data=self))
            else:
                display(Audio(filename=self))
        else:
            warnings.warn('Display of audio is only possible in a notebook.')
display()

Play the audio sound from url in notebook.

Source code in docarray/typing/url/audio_url.py
def display(self):
    """
    Play the audio sound from url in notebook.
    """
    if is_notebook():
        from IPython.display import Audio, display

        remote_url = True if self.startswith('http') else False

        if remote_url:
            display(Audio(data=self))
        else:
            display(Audio(filename=self))
    else:
        warnings.warn('Display of audio is only possible in a notebook.')
load()

Load the data from the url into an AudioNdArray and the frame rate.


from typing import Optional

from docarray import BaseDoc
from docarray.typing import AudioNdArray, AudioUrl


class MyDoc(BaseDoc):
    audio_url: AudioUrl
    audio_tensor: Optional[AudioNdArray]


doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
doc.audio_tensor, _ = doc.audio_url.load()
assert isinstance(doc.audio_tensor, AudioNdArray)

Returns:

Type Description
Tuple[AudioNdArray, int]

tuple of an AudioNdArray representing the audio file content, and an integer representing the frame rate.

Source code in docarray/typing/url/audio_url.py
def load(self: T) -> Tuple[AudioNdArray, int]:
    """
    Load the data from the url into an [`AudioNdArray`][docarray.typing.AudioNdArray]
    and the frame rate.

    ---

    ```python
    from typing import Optional

    from docarray import BaseDoc
    from docarray.typing import AudioNdArray, AudioUrl


    class MyDoc(BaseDoc):
        audio_url: AudioUrl
        audio_tensor: Optional[AudioNdArray]


    doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
    doc.audio_tensor, _ = doc.audio_url.load()
    assert isinstance(doc.audio_tensor, AudioNdArray)
    ```

    ---

    :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing
        the audio file content, and an integer representing the frame rate.

    """
    bytes_ = self.load_bytes()
    return bytes_.load()
load_bytes(timeout=None)

Convert url to AudioBytes. This will either load or download the file and save it into an AudioBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
AudioBytes

AudioBytes object

Source code in docarray/typing/url/audio_url.py
def load_bytes(self, timeout: Optional[float] = None) -> AudioBytes:
    """
    Convert url to [`AudioBytes`][docarray.typing.AudioBytes]. This will either load or
    download the file and save it into an [`AudioBytes`][docarray.typing.AudioBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`AudioBytes`][docarray.typing.AudioBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return AudioBytes(bytes_)

image_url

ImageUrl

Bases: AnyUrl

URL to an image file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/image_url.py
@_register_proto(proto_type_name='image_url')
class ImageUrl(AnyUrl):
    """
    URL to an image file.
    Can be remote (web) URL, or a local file path.
    """

    def load_pil(self, timeout: Optional[float] = None) -> 'PILImage.Image':
        """
        Load the image from the bytes into a `PIL.Image.Image` instance

        ---

        ```python
        from pydantic import parse_obj_as

        from docarray import BaseDoc
        from docarray.typing import ImageUrl

        img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

        img_url = parse_obj_as(ImageUrl, img_url)
        img = img_url.load_pil()

        from PIL.Image import Image

        assert isinstance(img, Image)
        ```

        ---
        :return: a Pillow image
        """
        from docarray.typing.bytes.image_bytes import ImageBytes

        return ImageBytes(self.load_bytes(timeout=timeout)).load_pil()

    def load(
        self,
        width: Optional[int] = None,
        height: Optional[int] = None,
        axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
        timeout: Optional[float] = None,
    ) -> ImageNdArray:
        """
        Load the data from the url into an [`ImageNdArray`][docarray.typing.ImageNdArray]

        ---

        ```python
        from docarray import BaseDoc
        from docarray.typing import ImageUrl, ImageNdArray


        class MyDoc(BaseDoc):
            img_url: ImageUrl


        doc = MyDoc(
            img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
            "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
        )

        img_tensor = doc.img_url.load()
        assert isinstance(img_tensor, ImageNdArray)

        img_tensor = doc.img_url.load(height=224, width=224)
        assert img_tensor.shape == (224, 224, 3)

        layout = ('C', 'W', 'H')
        img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
        assert img_tensor.shape == (3, 200, 100)
        ```

        ---

        :param width: width of the image tensor.
        :param height: height of the image tensor.
        :param axis_layout: ordering of the different image axes.
            'H' = height, 'W' = width, 'C' = color channel
        :param timeout: timeout (sec) for urlopen network request.
            Only relevant if URL is not local
        :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
        """
        from docarray.typing.bytes.image_bytes import ImageBytes

        buffer = ImageBytes(self.load_bytes(timeout=timeout))
        return buffer.load(width, height, axis_layout)

    def load_bytes(self, timeout: Optional[float] = None) -> ImageBytes:
        """
        Convert url to [`ImageBytes`][docarray.typing.ImageBytes]. This will either load or
        download the file and save it into an [`ImageBytes`][docarray.typing.ImageBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`ImageBytes`][docarray.typing.ImageBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return ImageBytes(bytes_)

    def display(self) -> None:
        """
        Display image data from url in notebook.
        """
        if is_notebook():
            from IPython.display import Image, display

            remote_url = True if self.startswith('http') else False
            if remote_url:
                display(Image(url=self))
            else:
                display(Image(filename=self))
        else:
            warnings.warn('Display of image is only possible in a notebook.')
display()

Display image data from url in notebook.

Source code in docarray/typing/url/image_url.py
def display(self) -> None:
    """
    Display image data from url in notebook.
    """
    if is_notebook():
        from IPython.display import Image, display

        remote_url = True if self.startswith('http') else False
        if remote_url:
            display(Image(url=self))
        else:
            display(Image(filename=self))
    else:
        warnings.warn('Display of image is only possible in a notebook.')
load(width=None, height=None, axis_layout=('H', 'W', 'C'), timeout=None)

Load the data from the url into an ImageNdArray


from docarray import BaseDoc
from docarray.typing import ImageUrl, ImageNdArray


class MyDoc(BaseDoc):
    img_url: ImageUrl


doc = MyDoc(
    img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
    "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
)

img_tensor = doc.img_url.load()
assert isinstance(img_tensor, ImageNdArray)

img_tensor = doc.img_url.load(height=224, width=224)
assert img_tensor.shape == (224, 224, 3)

layout = ('C', 'W', 'H')
img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
assert img_tensor.shape == (3, 200, 100)

Parameters:

Name Type Description Default
width Optional[int]

width of the image tensor.

None
height Optional[int]

height of the image tensor.

None
axis_layout Tuple[str, str, str]

ordering of the different image axes. 'H' = height, 'W' = width, 'C' = color channel

('H', 'W', 'C')
timeout Optional[float]

timeout (sec) for urlopen network request. Only relevant if URL is not local

None

Returns:

Type Description
ImageNdArray

ImageNdArray representing the image as RGB values

Source code in docarray/typing/url/image_url.py
def load(
    self,
    width: Optional[int] = None,
    height: Optional[int] = None,
    axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
    timeout: Optional[float] = None,
) -> ImageNdArray:
    """
    Load the data from the url into an [`ImageNdArray`][docarray.typing.ImageNdArray]

    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import ImageUrl, ImageNdArray


    class MyDoc(BaseDoc):
        img_url: ImageUrl


    doc = MyDoc(
        img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
        "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
    )

    img_tensor = doc.img_url.load()
    assert isinstance(img_tensor, ImageNdArray)

    img_tensor = doc.img_url.load(height=224, width=224)
    assert img_tensor.shape == (224, 224, 3)

    layout = ('C', 'W', 'H')
    img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
    assert img_tensor.shape == (3, 200, 100)
    ```

    ---

    :param width: width of the image tensor.
    :param height: height of the image tensor.
    :param axis_layout: ordering of the different image axes.
        'H' = height, 'W' = width, 'C' = color channel
    :param timeout: timeout (sec) for urlopen network request.
        Only relevant if URL is not local
    :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
    """
    from docarray.typing.bytes.image_bytes import ImageBytes

    buffer = ImageBytes(self.load_bytes(timeout=timeout))
    return buffer.load(width, height, axis_layout)
load_bytes(timeout=None)

Convert url to ImageBytes. This will either load or download the file and save it into an ImageBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
ImageBytes

ImageBytes object

Source code in docarray/typing/url/image_url.py
def load_bytes(self, timeout: Optional[float] = None) -> ImageBytes:
    """
    Convert url to [`ImageBytes`][docarray.typing.ImageBytes]. This will either load or
    download the file and save it into an [`ImageBytes`][docarray.typing.ImageBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`ImageBytes`][docarray.typing.ImageBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return ImageBytes(bytes_)
load_pil(timeout=None)

Load the image from the bytes into a PIL.Image.Image instance


from pydantic import parse_obj_as

from docarray import BaseDoc
from docarray.typing import ImageUrl

img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

img_url = parse_obj_as(ImageUrl, img_url)
img = img_url.load_pil()

from PIL.Image import Image

assert isinstance(img, Image)

Returns:

Type Description
Image

a Pillow image

Source code in docarray/typing/url/image_url.py
def load_pil(self, timeout: Optional[float] = None) -> 'PILImage.Image':
    """
    Load the image from the bytes into a `PIL.Image.Image` instance

    ---

    ```python
    from pydantic import parse_obj_as

    from docarray import BaseDoc
    from docarray.typing import ImageUrl

    img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

    img_url = parse_obj_as(ImageUrl, img_url)
    img = img_url.load_pil()

    from PIL.Image import Image

    assert isinstance(img, Image)
    ```

    ---
    :return: a Pillow image
    """
    from docarray.typing.bytes.image_bytes import ImageBytes

    return ImageBytes(self.load_bytes(timeout=timeout)).load_pil()

text_url

TextUrl

Bases: AnyUrl

URL to a text file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/text_url.py
@_register_proto(proto_type_name='text_url')
class TextUrl(AnyUrl):
    """
    URL to a text file.
    Can be remote (web) URL, or a local file path.
    """

    def load(self, charset: str = 'utf-8', timeout: Optional[float] = None) -> str:
        """
        Load the text file into a string.


        ---

        ```python
        from docarray import BaseDoc
        from docarray.typing import TextUrl


        class MyDoc(BaseDoc):
            remote_url: TextUrl


        doc = MyDoc(
            remote_url='https://de.wikipedia.org/wiki/Brixen',
        )

        remote_txt = doc.remote_url.load()
        ```

        ---


        :param timeout: timeout (sec) for urlopen network request.
            Only relevant if URL is not local
        :param charset: decoding charset; may be any character set registered with IANA
        :return: the text file content
        """
        _bytes = self.load_bytes(timeout=timeout)
        return _bytes.decode(charset)
load(charset='utf-8', timeout=None)

Load the text file into a string.


from docarray import BaseDoc
from docarray.typing import TextUrl


class MyDoc(BaseDoc):
    remote_url: TextUrl


doc = MyDoc(
    remote_url='https://de.wikipedia.org/wiki/Brixen',
)

remote_txt = doc.remote_url.load()

Parameters:

Name Type Description Default
timeout Optional[float]

timeout (sec) for urlopen network request. Only relevant if URL is not local

None
charset str

decoding charset; may be any character set registered with IANA

'utf-8'

Returns:

Type Description
str

the text file content

Source code in docarray/typing/url/text_url.py
def load(self, charset: str = 'utf-8', timeout: Optional[float] = None) -> str:
    """
    Load the text file into a string.


    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import TextUrl


    class MyDoc(BaseDoc):
        remote_url: TextUrl


    doc = MyDoc(
        remote_url='https://de.wikipedia.org/wiki/Brixen',
    )

    remote_txt = doc.remote_url.load()
    ```

    ---


    :param timeout: timeout (sec) for urlopen network request.
        Only relevant if URL is not local
    :param charset: decoding charset; may be any character set registered with IANA
    :return: the text file content
    """
    _bytes = self.load_bytes(timeout=timeout)
    return _bytes.decode(charset)

url_3d

Mesh3DUrl

Bases: Url3D

URL to a file containing 3D mesh information. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/url_3d/mesh_url.py
@_register_proto(proto_type_name='mesh_url')
class Mesh3DUrl(Url3D):
    """
    URL to a file containing 3D mesh information.
    Can be remote (web) URL, or a local file path.
    """

    def load(
        self: T,
        skip_materials: bool = True,
        trimesh_args: Optional[Dict[str, Any]] = None,
    ) -> 'VerticesAndFaces':
        """
        Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
        object containing vertices and faces information.

        ---

        ```python
        from docarray import BaseDoc

        from docarray.typing import Mesh3DUrl, NdArray


        class MyDoc(BaseDoc):
            mesh_url: Mesh3DUrl


        doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        tensors = doc.mesh_url.load()
        assert isinstance(tensors.vertices, NdArray)
        assert isinstance(tensors.faces, NdArray)
        ```


        :param skip_materials: Skip materials if True, else skip.
        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
            or `trimesh.load_remote()`.
        :return: VerticesAndFaces object containing vertices and faces information.
        """
        from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces

        if not trimesh_args:
            trimesh_args = {}
        mesh = self._load_trimesh_instance(
            force='mesh', skip_materials=skip_materials, **trimesh_args
        )

        vertices = parse_obj_as(NdArray, mesh.vertices.view(np.ndarray))
        faces = parse_obj_as(NdArray, mesh.faces.view(np.ndarray))

        return VerticesAndFaces(vertices=vertices, faces=faces)

    def display(self) -> None:
        """
        Plot mesh from url.
        This loads the Trimesh instance of the 3D mesh, and then displays it.
        """
        from IPython.display import display

        mesh = self._load_trimesh_instance(skip_materials=False)
        display(mesh.show())
display()

Plot mesh from url. This loads the Trimesh instance of the 3D mesh, and then displays it.

Source code in docarray/typing/url/url_3d/mesh_url.py
def display(self) -> None:
    """
    Plot mesh from url.
    This loads the Trimesh instance of the 3D mesh, and then displays it.
    """
    from IPython.display import display

    mesh = self._load_trimesh_instance(skip_materials=False)
    display(mesh.show())
load(skip_materials=True, trimesh_args=None)

Load the data from the url into a VerticesAndFaces object containing vertices and faces information.


from docarray import BaseDoc

from docarray.typing import Mesh3DUrl, NdArray


class MyDoc(BaseDoc):
    mesh_url: Mesh3DUrl


doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

tensors = doc.mesh_url.load()
assert isinstance(tensors.vertices, NdArray)
assert isinstance(tensors.faces, NdArray)

Parameters:

Name Type Description Default
skip_materials bool

Skip materials if True, else skip.

True
trimesh_args Optional[Dict[str, Any]]

dictionary of additional arguments for trimesh.load() or trimesh.load_remote().

None

Returns:

Type Description
VerticesAndFaces

VerticesAndFaces object containing vertices and faces information.

Source code in docarray/typing/url/url_3d/mesh_url.py
def load(
    self: T,
    skip_materials: bool = True,
    trimesh_args: Optional[Dict[str, Any]] = None,
) -> 'VerticesAndFaces':
    """
    Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
    object containing vertices and faces information.

    ---

    ```python
    from docarray import BaseDoc

    from docarray.typing import Mesh3DUrl, NdArray


    class MyDoc(BaseDoc):
        mesh_url: Mesh3DUrl


    doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    tensors = doc.mesh_url.load()
    assert isinstance(tensors.vertices, NdArray)
    assert isinstance(tensors.faces, NdArray)
    ```


    :param skip_materials: Skip materials if True, else skip.
    :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
        or `trimesh.load_remote()`.
    :return: VerticesAndFaces object containing vertices and faces information.
    """
    from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces

    if not trimesh_args:
        trimesh_args = {}
    mesh = self._load_trimesh_instance(
        force='mesh', skip_materials=skip_materials, **trimesh_args
    )

    vertices = parse_obj_as(NdArray, mesh.vertices.view(np.ndarray))
    faces = parse_obj_as(NdArray, mesh.faces.view(np.ndarray))

    return VerticesAndFaces(vertices=vertices, faces=faces)

PointCloud3DUrl

Bases: Url3D

URL to a file containing point cloud information. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/url_3d/point_cloud_url.py
@_register_proto(proto_type_name='point_cloud_url')
class PointCloud3DUrl(Url3D):
    """
    URL to a file containing point cloud information.
    Can be remote (web) URL, or a local file path.
    """

    def load(
        self: T,
        samples: int,
        multiple_geometries: bool = False,
        skip_materials: bool = True,
        trimesh_args: Optional[Dict[str, Any]] = None,
    ) -> 'PointsAndColors':
        """
        Load the data from the url into an `NdArray` containing point cloud information.


        ---

        ```python
        import numpy as np
        from docarray import BaseDoc

        from docarray.typing import PointCloud3DUrl


        class MyDoc(BaseDoc):
            point_cloud_url: PointCloud3DUrl


        doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        # point_cloud = doc.point_cloud_url.load(samples=100)

        # assert isinstance(point_cloud, np.ndarray)
        # assert point_cloud.shape == (100, 3)
        ```

        ---

        :param samples: number of points to sample from the mesh
        :param multiple_geometries: if False, store point cloud in 2D np.ndarray.
            If True, store point clouds from multiple geometries in 3D np.ndarray.
        :param skip_materials: Skip materials if True, else load.
        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
            or `trimesh.load_remote()`.

        :return: np.ndarray representing the point cloud
        """
        from docarray.documents.point_cloud.points_and_colors import PointsAndColors

        if not trimesh_args:
            trimesh_args = {}

        if multiple_geometries:
            # try to coerce everything into a scene
            scene = self._load_trimesh_instance(
                force='scene', skip_materials=skip_materials, **trimesh_args
            )
            point_cloud = np.stack(
                [np.array(geo.sample(samples)) for geo in scene.geometry.values()],
                axis=0,
            )
        else:
            # combine a scene into a single mesh
            mesh = self._load_trimesh_instance(force='mesh', **trimesh_args)
            point_cloud = np.array(mesh.sample(samples))

        points = parse_obj_as(NdArray, point_cloud)
        return PointsAndColors(points=points, colors=None)

    def display(
        self,
        samples: int = 10000,
    ) -> None:
        """
        Plot point cloud from url.

        First, it loads the point cloud into a `PointsAndColors` object, and then
        calls display on it. The following is therefore equivalent:

        ---

        ```python
        import numpy as np
        from docarray import BaseDoc

        from docarray.documents import PointCloud3D

        pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        # option 1
        # pc.url.display()

        # option 2 (equivalent)
        # pc.url.load(samples=10000).display()
        ```

        ---

        :param samples: number of points to sample from the mesh.
        """
        self.load(samples=samples, skip_materials=False).display()
display(samples=10000)

Plot point cloud from url.

First, it loads the point cloud into a PointsAndColors object, and then calls display on it. The following is therefore equivalent:


import numpy as np
from docarray import BaseDoc

from docarray.documents import PointCloud3D

pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

# option 1
# pc.url.display()

# option 2 (equivalent)
# pc.url.load(samples=10000).display()

Parameters:

Name Type Description Default
samples int

number of points to sample from the mesh.

10000
Source code in docarray/typing/url/url_3d/point_cloud_url.py
def display(
    self,
    samples: int = 10000,
) -> None:
    """
    Plot point cloud from url.

    First, it loads the point cloud into a `PointsAndColors` object, and then
    calls display on it. The following is therefore equivalent:

    ---

    ```python
    import numpy as np
    from docarray import BaseDoc

    from docarray.documents import PointCloud3D

    pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    # option 1
    # pc.url.display()

    # option 2 (equivalent)
    # pc.url.load(samples=10000).display()
    ```

    ---

    :param samples: number of points to sample from the mesh.
    """
    self.load(samples=samples, skip_materials=False).display()
load(samples, multiple_geometries=False, skip_materials=True, trimesh_args=None)

Load the data from the url into an NdArray containing point cloud information.


import numpy as np
from docarray import BaseDoc

from docarray.typing import PointCloud3DUrl


class MyDoc(BaseDoc):
    point_cloud_url: PointCloud3DUrl


doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

# point_cloud = doc.point_cloud_url.load(samples=100)

# assert isinstance(point_cloud, np.ndarray)
# assert point_cloud.shape == (100, 3)

Parameters:

Name Type Description Default
samples int

number of points to sample from the mesh

required
multiple_geometries bool

if False, store point cloud in 2D np.ndarray. If True, store point clouds from multiple geometries in 3D np.ndarray.

False
skip_materials bool

Skip materials if True, else load.

True
trimesh_args Optional[Dict[str, Any]]

dictionary of additional arguments for trimesh.load() or trimesh.load_remote().

None

Returns:

Type Description
PointsAndColors

np.ndarray representing the point cloud

Source code in docarray/typing/url/url_3d/point_cloud_url.py
def load(
    self: T,
    samples: int,
    multiple_geometries: bool = False,
    skip_materials: bool = True,
    trimesh_args: Optional[Dict[str, Any]] = None,
) -> 'PointsAndColors':
    """
    Load the data from the url into an `NdArray` containing point cloud information.


    ---

    ```python
    import numpy as np
    from docarray import BaseDoc

    from docarray.typing import PointCloud3DUrl


    class MyDoc(BaseDoc):
        point_cloud_url: PointCloud3DUrl


    doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    # point_cloud = doc.point_cloud_url.load(samples=100)

    # assert isinstance(point_cloud, np.ndarray)
    # assert point_cloud.shape == (100, 3)
    ```

    ---

    :param samples: number of points to sample from the mesh
    :param multiple_geometries: if False, store point cloud in 2D np.ndarray.
        If True, store point clouds from multiple geometries in 3D np.ndarray.
    :param skip_materials: Skip materials if True, else load.
    :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
        or `trimesh.load_remote()`.

    :return: np.ndarray representing the point cloud
    """
    from docarray.documents.point_cloud.points_and_colors import PointsAndColors

    if not trimesh_args:
        trimesh_args = {}

    if multiple_geometries:
        # try to coerce everything into a scene
        scene = self._load_trimesh_instance(
            force='scene', skip_materials=skip_materials, **trimesh_args
        )
        point_cloud = np.stack(
            [np.array(geo.sample(samples)) for geo in scene.geometry.values()],
            axis=0,
        )
    else:
        # combine a scene into a single mesh
        mesh = self._load_trimesh_instance(force='mesh', **trimesh_args)
        point_cloud = np.array(mesh.sample(samples))

    points = parse_obj_as(NdArray, point_cloud)
    return PointsAndColors(points=points, colors=None)

mesh_url

Mesh3DUrl

Bases: Url3D

URL to a file containing 3D mesh information. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/url_3d/mesh_url.py
@_register_proto(proto_type_name='mesh_url')
class Mesh3DUrl(Url3D):
    """
    URL to a file containing 3D mesh information.
    Can be remote (web) URL, or a local file path.
    """

    def load(
        self: T,
        skip_materials: bool = True,
        trimesh_args: Optional[Dict[str, Any]] = None,
    ) -> 'VerticesAndFaces':
        """
        Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
        object containing vertices and faces information.

        ---

        ```python
        from docarray import BaseDoc

        from docarray.typing import Mesh3DUrl, NdArray


        class MyDoc(BaseDoc):
            mesh_url: Mesh3DUrl


        doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        tensors = doc.mesh_url.load()
        assert isinstance(tensors.vertices, NdArray)
        assert isinstance(tensors.faces, NdArray)
        ```


        :param skip_materials: Skip materials if True, else skip.
        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
            or `trimesh.load_remote()`.
        :return: VerticesAndFaces object containing vertices and faces information.
        """
        from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces

        if not trimesh_args:
            trimesh_args = {}
        mesh = self._load_trimesh_instance(
            force='mesh', skip_materials=skip_materials, **trimesh_args
        )

        vertices = parse_obj_as(NdArray, mesh.vertices.view(np.ndarray))
        faces = parse_obj_as(NdArray, mesh.faces.view(np.ndarray))

        return VerticesAndFaces(vertices=vertices, faces=faces)

    def display(self) -> None:
        """
        Plot mesh from url.
        This loads the Trimesh instance of the 3D mesh, and then displays it.
        """
        from IPython.display import display

        mesh = self._load_trimesh_instance(skip_materials=False)
        display(mesh.show())
display()

Plot mesh from url. This loads the Trimesh instance of the 3D mesh, and then displays it.

Source code in docarray/typing/url/url_3d/mesh_url.py
def display(self) -> None:
    """
    Plot mesh from url.
    This loads the Trimesh instance of the 3D mesh, and then displays it.
    """
    from IPython.display import display

    mesh = self._load_trimesh_instance(skip_materials=False)
    display(mesh.show())
load(skip_materials=True, trimesh_args=None)

Load the data from the url into a VerticesAndFaces object containing vertices and faces information.


from docarray import BaseDoc

from docarray.typing import Mesh3DUrl, NdArray


class MyDoc(BaseDoc):
    mesh_url: Mesh3DUrl


doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

tensors = doc.mesh_url.load()
assert isinstance(tensors.vertices, NdArray)
assert isinstance(tensors.faces, NdArray)

Parameters:

Name Type Description Default
skip_materials bool

Skip materials if True, else skip.

True
trimesh_args Optional[Dict[str, Any]]

dictionary of additional arguments for trimesh.load() or trimesh.load_remote().

None

Returns:

Type Description
VerticesAndFaces

VerticesAndFaces object containing vertices and faces information.

Source code in docarray/typing/url/url_3d/mesh_url.py
def load(
    self: T,
    skip_materials: bool = True,
    trimesh_args: Optional[Dict[str, Any]] = None,
) -> 'VerticesAndFaces':
    """
    Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
    object containing vertices and faces information.

    ---

    ```python
    from docarray import BaseDoc

    from docarray.typing import Mesh3DUrl, NdArray


    class MyDoc(BaseDoc):
        mesh_url: Mesh3DUrl


    doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    tensors = doc.mesh_url.load()
    assert isinstance(tensors.vertices, NdArray)
    assert isinstance(tensors.faces, NdArray)
    ```


    :param skip_materials: Skip materials if True, else skip.
    :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
        or `trimesh.load_remote()`.
    :return: VerticesAndFaces object containing vertices and faces information.
    """
    from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces

    if not trimesh_args:
        trimesh_args = {}
    mesh = self._load_trimesh_instance(
        force='mesh', skip_materials=skip_materials, **trimesh_args
    )

    vertices = parse_obj_as(NdArray, mesh.vertices.view(np.ndarray))
    faces = parse_obj_as(NdArray, mesh.faces.view(np.ndarray))

    return VerticesAndFaces(vertices=vertices, faces=faces)

point_cloud_url

PointCloud3DUrl

Bases: Url3D

URL to a file containing point cloud information. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/url_3d/point_cloud_url.py
@_register_proto(proto_type_name='point_cloud_url')
class PointCloud3DUrl(Url3D):
    """
    URL to a file containing point cloud information.
    Can be remote (web) URL, or a local file path.
    """

    def load(
        self: T,
        samples: int,
        multiple_geometries: bool = False,
        skip_materials: bool = True,
        trimesh_args: Optional[Dict[str, Any]] = None,
    ) -> 'PointsAndColors':
        """
        Load the data from the url into an `NdArray` containing point cloud information.


        ---

        ```python
        import numpy as np
        from docarray import BaseDoc

        from docarray.typing import PointCloud3DUrl


        class MyDoc(BaseDoc):
            point_cloud_url: PointCloud3DUrl


        doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        # point_cloud = doc.point_cloud_url.load(samples=100)

        # assert isinstance(point_cloud, np.ndarray)
        # assert point_cloud.shape == (100, 3)
        ```

        ---

        :param samples: number of points to sample from the mesh
        :param multiple_geometries: if False, store point cloud in 2D np.ndarray.
            If True, store point clouds from multiple geometries in 3D np.ndarray.
        :param skip_materials: Skip materials if True, else load.
        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
            or `trimesh.load_remote()`.

        :return: np.ndarray representing the point cloud
        """
        from docarray.documents.point_cloud.points_and_colors import PointsAndColors

        if not trimesh_args:
            trimesh_args = {}

        if multiple_geometries:
            # try to coerce everything into a scene
            scene = self._load_trimesh_instance(
                force='scene', skip_materials=skip_materials, **trimesh_args
            )
            point_cloud = np.stack(
                [np.array(geo.sample(samples)) for geo in scene.geometry.values()],
                axis=0,
            )
        else:
            # combine a scene into a single mesh
            mesh = self._load_trimesh_instance(force='mesh', **trimesh_args)
            point_cloud = np.array(mesh.sample(samples))

        points = parse_obj_as(NdArray, point_cloud)
        return PointsAndColors(points=points, colors=None)

    def display(
        self,
        samples: int = 10000,
    ) -> None:
        """
        Plot point cloud from url.

        First, it loads the point cloud into a `PointsAndColors` object, and then
        calls display on it. The following is therefore equivalent:

        ---

        ```python
        import numpy as np
        from docarray import BaseDoc

        from docarray.documents import PointCloud3D

        pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        # option 1
        # pc.url.display()

        # option 2 (equivalent)
        # pc.url.load(samples=10000).display()
        ```

        ---

        :param samples: number of points to sample from the mesh.
        """
        self.load(samples=samples, skip_materials=False).display()
display(samples=10000)

Plot point cloud from url.

First, it loads the point cloud into a PointsAndColors object, and then calls display on it. The following is therefore equivalent:


import numpy as np
from docarray import BaseDoc

from docarray.documents import PointCloud3D

pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

# option 1
# pc.url.display()

# option 2 (equivalent)
# pc.url.load(samples=10000).display()

Parameters:

Name Type Description Default
samples int

number of points to sample from the mesh.

10000
Source code in docarray/typing/url/url_3d/point_cloud_url.py
def display(
    self,
    samples: int = 10000,
) -> None:
    """
    Plot point cloud from url.

    First, it loads the point cloud into a `PointsAndColors` object, and then
    calls display on it. The following is therefore equivalent:

    ---

    ```python
    import numpy as np
    from docarray import BaseDoc

    from docarray.documents import PointCloud3D

    pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    # option 1
    # pc.url.display()

    # option 2 (equivalent)
    # pc.url.load(samples=10000).display()
    ```

    ---

    :param samples: number of points to sample from the mesh.
    """
    self.load(samples=samples, skip_materials=False).display()
load(samples, multiple_geometries=False, skip_materials=True, trimesh_args=None)

Load the data from the url into an NdArray containing point cloud information.


import numpy as np
from docarray import BaseDoc

from docarray.typing import PointCloud3DUrl


class MyDoc(BaseDoc):
    point_cloud_url: PointCloud3DUrl


doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

# point_cloud = doc.point_cloud_url.load(samples=100)

# assert isinstance(point_cloud, np.ndarray)
# assert point_cloud.shape == (100, 3)

Parameters:

Name Type Description Default
samples int

number of points to sample from the mesh

required
multiple_geometries bool

if False, store point cloud in 2D np.ndarray. If True, store point clouds from multiple geometries in 3D np.ndarray.

False
skip_materials bool

Skip materials if True, else load.

True
trimesh_args Optional[Dict[str, Any]]

dictionary of additional arguments for trimesh.load() or trimesh.load_remote().

None

Returns:

Type Description
PointsAndColors

np.ndarray representing the point cloud

Source code in docarray/typing/url/url_3d/point_cloud_url.py
def load(
    self: T,
    samples: int,
    multiple_geometries: bool = False,
    skip_materials: bool = True,
    trimesh_args: Optional[Dict[str, Any]] = None,
) -> 'PointsAndColors':
    """
    Load the data from the url into an `NdArray` containing point cloud information.


    ---

    ```python
    import numpy as np
    from docarray import BaseDoc

    from docarray.typing import PointCloud3DUrl


    class MyDoc(BaseDoc):
        point_cloud_url: PointCloud3DUrl


    doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    # point_cloud = doc.point_cloud_url.load(samples=100)

    # assert isinstance(point_cloud, np.ndarray)
    # assert point_cloud.shape == (100, 3)
    ```

    ---

    :param samples: number of points to sample from the mesh
    :param multiple_geometries: if False, store point cloud in 2D np.ndarray.
        If True, store point clouds from multiple geometries in 3D np.ndarray.
    :param skip_materials: Skip materials if True, else load.
    :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
        or `trimesh.load_remote()`.

    :return: np.ndarray representing the point cloud
    """
    from docarray.documents.point_cloud.points_and_colors import PointsAndColors

    if not trimesh_args:
        trimesh_args = {}

    if multiple_geometries:
        # try to coerce everything into a scene
        scene = self._load_trimesh_instance(
            force='scene', skip_materials=skip_materials, **trimesh_args
        )
        point_cloud = np.stack(
            [np.array(geo.sample(samples)) for geo in scene.geometry.values()],
            axis=0,
        )
    else:
        # combine a scene into a single mesh
        mesh = self._load_trimesh_instance(force='mesh', **trimesh_args)
        point_cloud = np.array(mesh.sample(samples))

    points = parse_obj_as(NdArray, point_cloud)
    return PointsAndColors(points=points, colors=None)

url_3d

Url3D

Bases: AnyUrl, ABC

URL to a file containing 3D mesh or point cloud information. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/url_3d/url_3d.py
@_register_proto(proto_type_name='url3d')
class Url3D(AnyUrl, ABC):
    """
    URL to a file containing 3D mesh or point cloud information.
    Can be remote (web) URL, or a local file path.
    """

    def _load_trimesh_instance(
        self: T,
        force: Optional[str] = None,
        skip_materials: bool = True,
        trimesh_args: Optional[Dict[str, Any]] = None,
    ) -> Union['trimesh.Trimesh', 'trimesh.Scene']:
        """
        Load the data from the url into a trimesh.Mesh or trimesh.Scene object.

        :param force: str or None. For 'mesh' try to coerce scenes into a single mesh.
            For 'scene' try to coerce everything into a scene.
        :param skip_materials: Skip materials if True, else skip.
        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
            or `trimesh.load_remote()`.
        :return: trimesh.Mesh or trimesh.Scene object
        """
        import urllib.parse

        if TYPE_CHECKING:
            import trimesh
        else:
            trimesh = import_library('trimesh', raise_error=True)

        if not trimesh_args:
            trimesh_args = {}

        scheme = urllib.parse.urlparse(self).scheme
        loader = trimesh.load_remote if scheme in ['http', 'https'] else trimesh.load

        mesh = loader(self, force=force, skip_materials=skip_materials, **trimesh_args)

        return mesh

video_url

VideoUrl

Bases: AnyUrl

URL to a video file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/video_url.py
@_register_proto(proto_type_name='video_url')
class VideoUrl(AnyUrl):
    """
    URL to a video file.
    Can be remote (web) URL, or a local file path.
    """

    def load(self: T, **kwargs) -> VideoLoadResult:
        """
        Load the data from the url into a `NamedTuple` of
        [`VideoNdArray`][docarray.typing.VideoNdArray],
        [`AudioNdArray`][docarray.typing.AudioNdArray]
        and [`NdArray`][docarray.typing.NdArray].

        ---

        ```python
        from typing import Optional

        from docarray import BaseDoc

        from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray


        class MyDoc(BaseDoc):
            video_url: VideoUrl
            video: Optional[VideoNdArray]
            audio: Optional[AudioNdArray]
            key_frame_indices: Optional[NdArray]


        doc = MyDoc(
            video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true'
        )
        doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load()

        assert isinstance(doc.video, VideoNdArray)
        assert isinstance(doc.audio, AudioNdArray)
        assert isinstance(doc.key_frame_indices, NdArray)
        ```

        ---

        You can load only the key frames (or video, audio respectively):

        ---

        ```python
        from pydantic import parse_obj_as

        from docarray.typing import NdArray, VideoUrl


        url = parse_obj_as(
            VideoUrl,
            'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true',
        )
        key_frame_indices = url.load().key_frame_indices
        assert isinstance(key_frame_indices, NdArray)
        ```

        ---

        :param kwargs: supports all keyword arguments that are being supported by
            av.open() as described [here](https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open)

        :return: [`AudioNdArray`][docarray.typing.AudioNdArray] representing the audio content,
            [`VideoNdArray`][docarray.typing.VideoNdArray] representing the images of the video,
            [`NdArray`][docarray.typing.NdArray] of the key frame indices.
        """
        buffer = self.load_bytes(**kwargs)
        return buffer.load()

    def load_bytes(self, timeout: Optional[float] = None) -> VideoBytes:
        """
        Convert url to [`VideoBytes`][docarray.typing.VideoBytes]. This will either load or download
        the file and save it into an [`VideoBytes`][docarray.typing.VideoBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`VideoBytes`][docarray.typing.VideoBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return VideoBytes(bytes_)

    def display(self):
        """
        Play video from url in notebook.
        """
        if is_notebook():
            from IPython.display import display

            remote_url = True if self.startswith('http') else False

            if remote_url:
                from IPython.display import Video

                b = self.load_bytes()
                display(Video(data=b, embed=True, mimetype='video/mp4'))
            else:
                import os

                from IPython.display import HTML

                path = os.path.relpath(self)
                src = f'''
                    <body>
                    <video width="320" height="240" autoplay muted controls>
                    <source src="{path}">
                    Your browser does not support the video tag.
                    </video>
                    </body>
                    '''
                display(HTML(src))

        else:
            warnings.warn('Display of video is only possible in a notebook.')
display()

Play video from url in notebook.

Source code in docarray/typing/url/video_url.py
def display(self):
    """
    Play video from url in notebook.
    """
    if is_notebook():
        from IPython.display import display

        remote_url = True if self.startswith('http') else False

        if remote_url:
            from IPython.display import Video

            b = self.load_bytes()
            display(Video(data=b, embed=True, mimetype='video/mp4'))
        else:
            import os

            from IPython.display import HTML

            path = os.path.relpath(self)
            src = f'''
                <body>
                <video width="320" height="240" autoplay muted controls>
                <source src="{path}">
                Your browser does not support the video tag.
                </video>
                </body>
                '''
            display(HTML(src))

    else:
        warnings.warn('Display of video is only possible in a notebook.')
load(**kwargs)

Load the data from the url into a NamedTuple of VideoNdArray, AudioNdArray and NdArray.


from typing import Optional

from docarray import BaseDoc

from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray


class MyDoc(BaseDoc):
    video_url: VideoUrl
    video: Optional[VideoNdArray]
    audio: Optional[AudioNdArray]
    key_frame_indices: Optional[NdArray]


doc = MyDoc(
    video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true'
)
doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load()

assert isinstance(doc.video, VideoNdArray)
assert isinstance(doc.audio, AudioNdArray)
assert isinstance(doc.key_frame_indices, NdArray)

You can load only the key frames (or video, audio respectively):


from pydantic import parse_obj_as

from docarray.typing import NdArray, VideoUrl


url = parse_obj_as(
    VideoUrl,
    'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true',
)
key_frame_indices = url.load().key_frame_indices
assert isinstance(key_frame_indices, NdArray)

Parameters:

Name Type Description Default
kwargs

supports all keyword arguments that are being supported by av.open() as described here

{}

Returns:

Type Description
VideoLoadResult

AudioNdArray representing the audio content, VideoNdArray representing the images of the video, NdArray of the key frame indices.

Source code in docarray/typing/url/video_url.py
def load(self: T, **kwargs) -> VideoLoadResult:
    """
    Load the data from the url into a `NamedTuple` of
    [`VideoNdArray`][docarray.typing.VideoNdArray],
    [`AudioNdArray`][docarray.typing.AudioNdArray]
    and [`NdArray`][docarray.typing.NdArray].

    ---

    ```python
    from typing import Optional

    from docarray import BaseDoc

    from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray


    class MyDoc(BaseDoc):
        video_url: VideoUrl
        video: Optional[VideoNdArray]
        audio: Optional[AudioNdArray]
        key_frame_indices: Optional[NdArray]


    doc = MyDoc(
        video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true'
    )
    doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load()

    assert isinstance(doc.video, VideoNdArray)
    assert isinstance(doc.audio, AudioNdArray)
    assert isinstance(doc.key_frame_indices, NdArray)
    ```

    ---

    You can load only the key frames (or video, audio respectively):

    ---

    ```python
    from pydantic import parse_obj_as

    from docarray.typing import NdArray, VideoUrl


    url = parse_obj_as(
        VideoUrl,
        'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true',
    )
    key_frame_indices = url.load().key_frame_indices
    assert isinstance(key_frame_indices, NdArray)
    ```

    ---

    :param kwargs: supports all keyword arguments that are being supported by
        av.open() as described [here](https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open)

    :return: [`AudioNdArray`][docarray.typing.AudioNdArray] representing the audio content,
        [`VideoNdArray`][docarray.typing.VideoNdArray] representing the images of the video,
        [`NdArray`][docarray.typing.NdArray] of the key frame indices.
    """
    buffer = self.load_bytes(**kwargs)
    return buffer.load()
load_bytes(timeout=None)

Convert url to VideoBytes. This will either load or download the file and save it into an VideoBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
VideoBytes

VideoBytes object

Source code in docarray/typing/url/video_url.py
def load_bytes(self, timeout: Optional[float] = None) -> VideoBytes:
    """
    Convert url to [`VideoBytes`][docarray.typing.VideoBytes]. This will either load or download
    the file and save it into an [`VideoBytes`][docarray.typing.VideoBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`VideoBytes`][docarray.typing.VideoBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return VideoBytes(bytes_)