Skip to content

Url

docarray.typing.url

AnyUrl

Bases: AnyUrl, AbstractType

Source code in docarray/typing/url/any_url.py
@_register_proto(proto_type_name='any_url')
class AnyUrl(BaseAnyUrl, AbstractType):
    host_required = (
        False  # turn off host requirement to allow passing of local paths as URL
    )

    @classmethod
    def mime_type(cls) -> str:
        """Returns the mime type associated with the class."""
        raise NotImplementedError

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """Returns a list of allowed file extensions for the class
        that are not covered by the mimetypes library."""
        raise NotImplementedError

    def _to_node_protobuf(self) -> 'NodeProto':
        """Convert Document into a NodeProto protobuf message. This function should
        be called when the Document is nested into another Document that need to
        be converted into a protobuf

        :return: the nested item protobuf message
        """
        from docarray.proto import NodeProto

        return NodeProto(text=str(self), type=self._proto_type_name)

    @staticmethod
    def _get_url_extension(url: str) -> str:
        """
        Extracts and returns the file extension from a given URL.
        If no file extension is present, the function returns an empty string.


        :param url: The URL to extract the file extension from.
        :return: The file extension without the period, if one exists,
            otherwise an empty string.
        """

        parsed_url = urllib.parse.urlparse(url)
        ext = os.path.splitext(parsed_url.path)[1]
        ext = ext[1:] if ext.startswith('.') else ext
        return ext

    @classmethod
    def is_extension_allowed(cls, value: Any) -> bool:
        """
        Check if the file extension of the URL is allowed for this class.
        First, it guesses the mime type of the file. If it fails to detect the
        mime type, it then checks the extra file extensions.
        Note: This method assumes that any URL without an extension is valid.

        :param value: The URL or file path.
        :return: True if the extension is allowed, False otherwise
        """
        if cls is AnyUrl:
            return True

        url_parts = value.split('?')
        extension = cls._get_url_extension(value)
        if not extension:
            return True

        mimetype, _ = mimetypes.guess_type(url_parts[0])
        if mimetype and mimetype.startswith(cls.mime_type()):
            return True

        return extension in cls.extra_extensions()

    @classmethod
    def validate(
        cls: Type[T],
        value: Union[T, np.ndarray, Any],
        field: 'ModelField',
        config: 'BaseConfig',
    ) -> T:
        import os

        abs_path: Union[T, np.ndarray, Any]
        if (
            isinstance(value, str)
            and not value.startswith('http')
            and not os.path.isabs(value)
        ):
            input_is_relative_path = True
            abs_path = os.path.abspath(value)
        else:
            input_is_relative_path = False
            abs_path = value

        url = super().validate(abs_path, field, config)  # basic url validation

        if not cls.is_extension_allowed(value):
            raise ValueError(
                f"The file '{value}' is not in a valid format for class '{cls.__name__}'."
            )

        return cls(str(value if input_is_relative_path else url), scheme=None)

    @classmethod
    def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
        """
        A method used to validate parts of a URL.
        Our URLs should be able to function both in local and remote settings.
        Therefore, we allow missing `scheme`, making it possible to pass a file
        path without prefix.
        If `scheme` is missing, we assume it is a local file path.
        """
        scheme = parts['scheme']
        if scheme is None:
            # allow missing scheme, unlike pydantic
            pass

        elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
            raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

        if validate_port:
            cls._validate_port(parts['port'])

        user = parts['user']
        if cls.user_required and user is None:
            raise errors.UrlUserInfoError()

        return parts

    @classmethod
    def build(
        cls,
        *,
        scheme: str,
        user: Optional[str] = None,
        password: Optional[str] = None,
        host: str,
        port: Optional[str] = None,
        path: Optional[str] = None,
        query: Optional[str] = None,
        fragment: Optional[str] = None,
        **_kwargs: str,
    ) -> str:
        """
        Build a URL from its parts.
        The only difference from the pydantic implementation is that we allow
        missing `scheme`, making it possible to pass a file path without prefix.
        """

        # allow missing scheme, unlike pydantic
        scheme_ = scheme if scheme is not None else ''
        url = super().build(
            scheme=scheme_,
            user=user,
            password=password,
            host=host,
            port=port,
            path=path,
            query=query,
            fragment=fragment,
            **_kwargs,
        )
        if scheme is None and url.startswith('://'):
            # remove the `://` prefix, since scheme is missing
            url = url[3:]
        return url

    @classmethod
    def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
        """
        Read url from a proto msg.
        :param pb_msg:
        :return: url
        """
        return parse_obj_as(cls, pb_msg)

    def load_bytes(self, timeout: Optional[float] = None) -> bytes:
        """Convert url to bytes. This will either load or download the file and save
        it into a bytes object.
        :param timeout: timeout for urlopen. Only relevant if URI is not local
        :return: bytes.
        """
        if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
            req = urllib.request.Request(
                self, headers={'User-Agent': 'Mozilla/5.0'}
            )
            urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
            with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
                return fp.read()
        elif os.path.exists(self):
            with open(self, 'rb') as fp:
                return fp.read()
        else:
            raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')

build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url

extra_extensions() classmethod

Returns a list of allowed file extensions for the class that are not covered by the mimetypes library.

Source code in docarray/typing/url/any_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """Returns a list of allowed file extensions for the class
    that are not covered by the mimetypes library."""
    raise NotImplementedError

from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)

is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()

load_bytes(timeout=None)

Convert url to bytes. This will either load or download the file and save it into a bytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if URI is not local

None

Returns:

Type Description
bytes

bytes.

Source code in docarray/typing/url/any_url.py
def load_bytes(self, timeout: Optional[float] = None) -> bytes:
    """Convert url to bytes. This will either load or download the file and save
    it into a bytes object.
    :param timeout: timeout for urlopen. Only relevant if URI is not local
    :return: bytes.
    """
    if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
        req = urllib.request.Request(
            self, headers={'User-Agent': 'Mozilla/5.0'}
        )
        urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
        with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
            return fp.read()
    elif os.path.exists(self):
        with open(self, 'rb') as fp:
            return fp.read()
    else:
        raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')

mime_type() classmethod

Returns the mime type associated with the class.

Source code in docarray/typing/url/any_url.py
@classmethod
def mime_type(cls) -> str:
    """Returns the mime type associated with the class."""
    raise NotImplementedError

validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

AudioUrl

Bases: AnyUrl

URL to an audio file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/audio_url.py
@_register_proto(proto_type_name='audio_url')
class AudioUrl(AnyUrl):
    """
    URL to an audio file.
    Can be remote (web) URL, or a local file path.
    """

    @classmethod
    def mime_type(cls) -> str:
        return AUDIO_MIMETYPE

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """
        Returns a list of additional file extensions that are valid for this class
        but cannot be identified by the mimetypes library.
        """
        return []

    def load(self: T) -> Tuple[AudioNdArray, int]:
        """
        Load the data from the url into an [`AudioNdArray`][docarray.typing.AudioNdArray]
        and the frame rate.

        ---

        ```python
        from typing import Optional

        from docarray import BaseDoc
        from docarray.typing import AudioNdArray, AudioUrl


        class MyDoc(BaseDoc):
            audio_url: AudioUrl
            audio_tensor: Optional[AudioNdArray] = None


        doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
        doc.audio_tensor, _ = doc.audio_url.load()
        assert isinstance(doc.audio_tensor, AudioNdArray)
        ```

        ---

        :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing
            the audio file content, and an integer representing the frame rate.

        """
        bytes_ = self.load_bytes()
        return bytes_.load()

    def load_bytes(self, timeout: Optional[float] = None) -> AudioBytes:
        """
        Convert url to [`AudioBytes`][docarray.typing.AudioBytes]. This will either load or
        download the file and save it into an [`AudioBytes`][docarray.typing.AudioBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`AudioBytes`][docarray.typing.AudioBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return AudioBytes(bytes_)

    def display(self):
        """
        Play the audio sound from url in notebook.
        """
        if is_notebook():
            from IPython.display import Audio, display

            remote_url = True if self.startswith('http') else False

            if remote_url:
                display(Audio(data=self))
            else:
                display(Audio(filename=self))
        else:
            warnings.warn('Display of audio is only possible in a notebook.')

build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url

display()

Play the audio sound from url in notebook.

Source code in docarray/typing/url/audio_url.py
def display(self):
    """
    Play the audio sound from url in notebook.
    """
    if is_notebook():
        from IPython.display import Audio, display

        remote_url = True if self.startswith('http') else False

        if remote_url:
            display(Audio(data=self))
        else:
            display(Audio(filename=self))
    else:
        warnings.warn('Display of audio is only possible in a notebook.')

extra_extensions() classmethod

Returns a list of additional file extensions that are valid for this class but cannot be identified by the mimetypes library.

Source code in docarray/typing/url/audio_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """
    Returns a list of additional file extensions that are valid for this class
    but cannot be identified by the mimetypes library.
    """
    return []

from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)

is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()

load()

Load the data from the url into an AudioNdArray and the frame rate.


from typing import Optional

from docarray import BaseDoc
from docarray.typing import AudioNdArray, AudioUrl


class MyDoc(BaseDoc):
    audio_url: AudioUrl
    audio_tensor: Optional[AudioNdArray] = None


doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
doc.audio_tensor, _ = doc.audio_url.load()
assert isinstance(doc.audio_tensor, AudioNdArray)

Returns:

Type Description
Tuple[AudioNdArray, int]

tuple of an AudioNdArray representing the audio file content, and an integer representing the frame rate.

Source code in docarray/typing/url/audio_url.py
def load(self: T) -> Tuple[AudioNdArray, int]:
    """
    Load the data from the url into an [`AudioNdArray`][docarray.typing.AudioNdArray]
    and the frame rate.

    ---

    ```python
    from typing import Optional

    from docarray import BaseDoc
    from docarray.typing import AudioNdArray, AudioUrl


    class MyDoc(BaseDoc):
        audio_url: AudioUrl
        audio_tensor: Optional[AudioNdArray] = None


    doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
    doc.audio_tensor, _ = doc.audio_url.load()
    assert isinstance(doc.audio_tensor, AudioNdArray)
    ```

    ---

    :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing
        the audio file content, and an integer representing the frame rate.

    """
    bytes_ = self.load_bytes()
    return bytes_.load()

load_bytes(timeout=None)

Convert url to AudioBytes. This will either load or download the file and save it into an AudioBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
AudioBytes

AudioBytes object

Source code in docarray/typing/url/audio_url.py
def load_bytes(self, timeout: Optional[float] = None) -> AudioBytes:
    """
    Convert url to [`AudioBytes`][docarray.typing.AudioBytes]. This will either load or
    download the file and save it into an [`AudioBytes`][docarray.typing.AudioBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`AudioBytes`][docarray.typing.AudioBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return AudioBytes(bytes_)

validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

ImageUrl

Bases: AnyUrl

URL to an image file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/image_url.py
@_register_proto(proto_type_name='image_url')
class ImageUrl(AnyUrl):
    """
    URL to an image file.
    Can be remote (web) URL, or a local file path.
    """

    @classmethod
    def mime_type(cls) -> str:
        return IMAGE_MIMETYPE

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """
        Returns a list of additional file extensions that are valid for this class
        but cannot be identified by the mimetypes library.
        """
        return []

    def load_pil(self, timeout: Optional[float] = None) -> 'PILImage.Image':
        """
        Load the image from the bytes into a `PIL.Image.Image` instance

        ---

        ```python
        from pydantic import parse_obj_as

        from docarray import BaseDoc
        from docarray.typing import ImageUrl

        img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

        img_url = parse_obj_as(ImageUrl, img_url)
        img = img_url.load_pil()

        from PIL.Image import Image

        assert isinstance(img, Image)
        ```

        ---
        :return: a Pillow image
        """
        from docarray.typing.bytes.image_bytes import ImageBytes

        return ImageBytes(self.load_bytes(timeout=timeout)).load_pil()

    def load(
        self,
        width: Optional[int] = None,
        height: Optional[int] = None,
        axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
        timeout: Optional[float] = None,
    ) -> ImageNdArray:
        """
        Load the data from the url into an [`ImageNdArray`][docarray.typing.ImageNdArray]

        ---

        ```python
        from docarray import BaseDoc
        from docarray.typing import ImageUrl, ImageNdArray


        class MyDoc(BaseDoc):
            img_url: ImageUrl


        doc = MyDoc(
            img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
            "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
        )

        img_tensor = doc.img_url.load()
        assert isinstance(img_tensor, ImageNdArray)

        img_tensor = doc.img_url.load(height=224, width=224)
        assert img_tensor.shape == (224, 224, 3)

        layout = ('C', 'W', 'H')
        img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
        assert img_tensor.shape == (3, 200, 100)
        ```

        ---

        :param width: width of the image tensor.
        :param height: height of the image tensor.
        :param axis_layout: ordering of the different image axes.
            'H' = height, 'W' = width, 'C' = color channel
        :param timeout: timeout (sec) for urlopen network request.
            Only relevant if URL is not local
        :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
        """
        from docarray.typing.bytes.image_bytes import ImageBytes

        buffer = ImageBytes(self.load_bytes(timeout=timeout))
        return buffer.load(width, height, axis_layout)

    def load_bytes(self, timeout: Optional[float] = None) -> ImageBytes:
        """
        Convert url to [`ImageBytes`][docarray.typing.ImageBytes]. This will either load or
        download the file and save it into an [`ImageBytes`][docarray.typing.ImageBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`ImageBytes`][docarray.typing.ImageBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return ImageBytes(bytes_)

    def display(self) -> None:
        """
        Display image data from url in notebook.
        """
        if is_notebook():
            from IPython.display import Image, display

            remote_url = True if self.startswith('http') else False
            if remote_url:
                display(Image(url=self))
            else:
                display(Image(filename=self))
        else:
            warnings.warn('Display of image is only possible in a notebook.')

build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url

display()

Display image data from url in notebook.

Source code in docarray/typing/url/image_url.py
def display(self) -> None:
    """
    Display image data from url in notebook.
    """
    if is_notebook():
        from IPython.display import Image, display

        remote_url = True if self.startswith('http') else False
        if remote_url:
            display(Image(url=self))
        else:
            display(Image(filename=self))
    else:
        warnings.warn('Display of image is only possible in a notebook.')

extra_extensions() classmethod

Returns a list of additional file extensions that are valid for this class but cannot be identified by the mimetypes library.

Source code in docarray/typing/url/image_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """
    Returns a list of additional file extensions that are valid for this class
    but cannot be identified by the mimetypes library.
    """
    return []

from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)

is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()

load(width=None, height=None, axis_layout=('H', 'W', 'C'), timeout=None)

Load the data from the url into an ImageNdArray


from docarray import BaseDoc
from docarray.typing import ImageUrl, ImageNdArray


class MyDoc(BaseDoc):
    img_url: ImageUrl


doc = MyDoc(
    img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
    "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
)

img_tensor = doc.img_url.load()
assert isinstance(img_tensor, ImageNdArray)

img_tensor = doc.img_url.load(height=224, width=224)
assert img_tensor.shape == (224, 224, 3)

layout = ('C', 'W', 'H')
img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
assert img_tensor.shape == (3, 200, 100)

Parameters:

Name Type Description Default
width Optional[int]

width of the image tensor.

None
height Optional[int]

height of the image tensor.

None
axis_layout Tuple[str, str, str]

ordering of the different image axes. 'H' = height, 'W' = width, 'C' = color channel

('H', 'W', 'C')
timeout Optional[float]

timeout (sec) for urlopen network request. Only relevant if URL is not local

None

Returns:

Type Description
ImageNdArray

ImageNdArray representing the image as RGB values

Source code in docarray/typing/url/image_url.py
def load(
    self,
    width: Optional[int] = None,
    height: Optional[int] = None,
    axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
    timeout: Optional[float] = None,
) -> ImageNdArray:
    """
    Load the data from the url into an [`ImageNdArray`][docarray.typing.ImageNdArray]

    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import ImageUrl, ImageNdArray


    class MyDoc(BaseDoc):
        img_url: ImageUrl


    doc = MyDoc(
        img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
        "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
    )

    img_tensor = doc.img_url.load()
    assert isinstance(img_tensor, ImageNdArray)

    img_tensor = doc.img_url.load(height=224, width=224)
    assert img_tensor.shape == (224, 224, 3)

    layout = ('C', 'W', 'H')
    img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
    assert img_tensor.shape == (3, 200, 100)
    ```

    ---

    :param width: width of the image tensor.
    :param height: height of the image tensor.
    :param axis_layout: ordering of the different image axes.
        'H' = height, 'W' = width, 'C' = color channel
    :param timeout: timeout (sec) for urlopen network request.
        Only relevant if URL is not local
    :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
    """
    from docarray.typing.bytes.image_bytes import ImageBytes

    buffer = ImageBytes(self.load_bytes(timeout=timeout))
    return buffer.load(width, height, axis_layout)

load_bytes(timeout=None)

Convert url to ImageBytes. This will either load or download the file and save it into an ImageBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
ImageBytes

ImageBytes object

Source code in docarray/typing/url/image_url.py
def load_bytes(self, timeout: Optional[float] = None) -> ImageBytes:
    """
    Convert url to [`ImageBytes`][docarray.typing.ImageBytes]. This will either load or
    download the file and save it into an [`ImageBytes`][docarray.typing.ImageBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`ImageBytes`][docarray.typing.ImageBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return ImageBytes(bytes_)

load_pil(timeout=None)

Load the image from the bytes into a PIL.Image.Image instance


from pydantic import parse_obj_as

from docarray import BaseDoc
from docarray.typing import ImageUrl

img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

img_url = parse_obj_as(ImageUrl, img_url)
img = img_url.load_pil()

from PIL.Image import Image

assert isinstance(img, Image)

Returns:

Type Description
Image

a Pillow image

Source code in docarray/typing/url/image_url.py
def load_pil(self, timeout: Optional[float] = None) -> 'PILImage.Image':
    """
    Load the image from the bytes into a `PIL.Image.Image` instance

    ---

    ```python
    from pydantic import parse_obj_as

    from docarray import BaseDoc
    from docarray.typing import ImageUrl

    img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

    img_url = parse_obj_as(ImageUrl, img_url)
    img = img_url.load_pil()

    from PIL.Image import Image

    assert isinstance(img, Image)
    ```

    ---
    :return: a Pillow image
    """
    from docarray.typing.bytes.image_bytes import ImageBytes

    return ImageBytes(self.load_bytes(timeout=timeout)).load_pil()

validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

Mesh3DUrl

Bases: Url3D

URL to a file containing 3D mesh information. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/url_3d/mesh_url.py
@_register_proto(proto_type_name='mesh_url')
class Mesh3DUrl(Url3D):
    """
    URL to a file containing 3D mesh information.
    Can be remote (web) URL, or a local file path.
    """

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """
        Returns a list of additional file extensions that are valid for this class
        but cannot be identified by the mimetypes library.
        """
        return MESH_EXTRA_EXTENSIONS

    def load(
        self: T,
        skip_materials: bool = True,
        trimesh_args: Optional[Dict[str, Any]] = None,
    ) -> 'VerticesAndFaces':
        """
        Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
        object containing vertices and faces information.

        ---

        ```python
        from docarray import BaseDoc

        from docarray.typing import Mesh3DUrl, NdArray


        class MyDoc(BaseDoc):
            mesh_url: Mesh3DUrl


        doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        tensors = doc.mesh_url.load()
        assert isinstance(tensors.vertices, NdArray)
        assert isinstance(tensors.faces, NdArray)
        ```


        :param skip_materials: Skip materials if True, else skip.
        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
            or `trimesh.load_remote()`.
        :return: VerticesAndFaces object containing vertices and faces information.
        """
        from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces

        if not trimesh_args:
            trimesh_args = {}
        mesh = self._load_trimesh_instance(
            force='mesh', skip_materials=skip_materials, **trimesh_args
        )

        vertices = parse_obj_as(NdArray, mesh.vertices.view(np.ndarray))
        faces = parse_obj_as(NdArray, mesh.faces.view(np.ndarray))

        return VerticesAndFaces(vertices=vertices, faces=faces)

    def display(self) -> None:
        """
        Plot mesh from url.
        This loads the Trimesh instance of the 3D mesh, and then displays it.
        """
        from IPython.display import display

        mesh = self._load_trimesh_instance(skip_materials=False)
        display(mesh.show())

build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url

display()

Plot mesh from url. This loads the Trimesh instance of the 3D mesh, and then displays it.

Source code in docarray/typing/url/url_3d/mesh_url.py
def display(self) -> None:
    """
    Plot mesh from url.
    This loads the Trimesh instance of the 3D mesh, and then displays it.
    """
    from IPython.display import display

    mesh = self._load_trimesh_instance(skip_materials=False)
    display(mesh.show())

extra_extensions() classmethod

Returns a list of additional file extensions that are valid for this class but cannot be identified by the mimetypes library.

Source code in docarray/typing/url/url_3d/mesh_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """
    Returns a list of additional file extensions that are valid for this class
    but cannot be identified by the mimetypes library.
    """
    return MESH_EXTRA_EXTENSIONS

from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)

is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()

load(skip_materials=True, trimesh_args=None)

Load the data from the url into a VerticesAndFaces object containing vertices and faces information.


from docarray import BaseDoc

from docarray.typing import Mesh3DUrl, NdArray


class MyDoc(BaseDoc):
    mesh_url: Mesh3DUrl


doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

tensors = doc.mesh_url.load()
assert isinstance(tensors.vertices, NdArray)
assert isinstance(tensors.faces, NdArray)

Parameters:

Name Type Description Default
skip_materials bool

Skip materials if True, else skip.

True
trimesh_args Optional[Dict[str, Any]]

dictionary of additional arguments for trimesh.load() or trimesh.load_remote().

None

Returns:

Type Description
VerticesAndFaces

VerticesAndFaces object containing vertices and faces information.

Source code in docarray/typing/url/url_3d/mesh_url.py
def load(
    self: T,
    skip_materials: bool = True,
    trimesh_args: Optional[Dict[str, Any]] = None,
) -> 'VerticesAndFaces':
    """
    Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
    object containing vertices and faces information.

    ---

    ```python
    from docarray import BaseDoc

    from docarray.typing import Mesh3DUrl, NdArray


    class MyDoc(BaseDoc):
        mesh_url: Mesh3DUrl


    doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    tensors = doc.mesh_url.load()
    assert isinstance(tensors.vertices, NdArray)
    assert isinstance(tensors.faces, NdArray)
    ```


    :param skip_materials: Skip materials if True, else skip.
    :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
        or `trimesh.load_remote()`.
    :return: VerticesAndFaces object containing vertices and faces information.
    """
    from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces

    if not trimesh_args:
        trimesh_args = {}
    mesh = self._load_trimesh_instance(
        force='mesh', skip_materials=skip_materials, **trimesh_args
    )

    vertices = parse_obj_as(NdArray, mesh.vertices.view(np.ndarray))
    faces = parse_obj_as(NdArray, mesh.faces.view(np.ndarray))

    return VerticesAndFaces(vertices=vertices, faces=faces)

load_bytes(timeout=None)

Convert url to bytes. This will either load or download the file and save it into a bytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if URI is not local

None

Returns:

Type Description
bytes

bytes.

Source code in docarray/typing/url/any_url.py
def load_bytes(self, timeout: Optional[float] = None) -> bytes:
    """Convert url to bytes. This will either load or download the file and save
    it into a bytes object.
    :param timeout: timeout for urlopen. Only relevant if URI is not local
    :return: bytes.
    """
    if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
        req = urllib.request.Request(
            self, headers={'User-Agent': 'Mozilla/5.0'}
        )
        urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
        with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
            return fp.read()
    elif os.path.exists(self):
        with open(self, 'rb') as fp:
            return fp.read()
    else:
        raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')

validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

PointCloud3DUrl

Bases: Url3D

URL to a file containing point cloud information. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/url_3d/point_cloud_url.py
@_register_proto(proto_type_name='point_cloud_url')
class PointCloud3DUrl(Url3D):
    """
    URL to a file containing point cloud information.
    Can be remote (web) URL, or a local file path.
    """

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """
        Returns a list of additional file extensions that are valid for this class
        but cannot be identified by the mimetypes library.
        """
        return POINT_CLOUD_EXTRA_EXTENSIONS

    def load(
        self: T,
        samples: int,
        multiple_geometries: bool = False,
        skip_materials: bool = True,
        trimesh_args: Optional[Dict[str, Any]] = None,
    ) -> 'PointsAndColors':
        """
        Load the data from the url into an `NdArray` containing point cloud information.


        ---

        ```python
        import numpy as np
        from docarray import BaseDoc

        from docarray.typing import PointCloud3DUrl


        class MyDoc(BaseDoc):
            point_cloud_url: PointCloud3DUrl


        doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        # point_cloud = doc.point_cloud_url.load(samples=100)

        # assert isinstance(point_cloud, np.ndarray)
        # assert point_cloud.shape == (100, 3)
        ```

        ---

        :param samples: number of points to sample from the mesh
        :param multiple_geometries: if False, store point cloud in 2D np.ndarray.
            If True, store point clouds from multiple geometries in 3D np.ndarray.
        :param skip_materials: Skip materials if True, else load.
        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
            or `trimesh.load_remote()`.

        :return: np.ndarray representing the point cloud
        """
        from docarray.documents.point_cloud.points_and_colors import PointsAndColors

        if not trimesh_args:
            trimesh_args = {}

        if multiple_geometries:
            # try to coerce everything into a scene
            scene = self._load_trimesh_instance(
                force='scene', skip_materials=skip_materials, **trimesh_args
            )
            point_cloud = np.stack(
                [np.array(geo.sample(samples)) for geo in scene.geometry.values()],
                axis=0,
            )
        else:
            # combine a scene into a single mesh
            mesh = self._load_trimesh_instance(force='mesh', **trimesh_args)
            point_cloud = np.array(mesh.sample(samples))

        points = parse_obj_as(NdArray, point_cloud)
        return PointsAndColors(points=points, colors=None)

    def display(
        self,
        samples: int = 10000,
    ) -> None:
        """
        Plot point cloud from url.

        First, it loads the point cloud into a `PointsAndColors` object, and then
        calls display on it. The following is therefore equivalent:

        ---

        ```python
        import numpy as np
        from docarray import BaseDoc

        from docarray.documents import PointCloud3D

        pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

        # option 1
        # pc.url.display()

        # option 2 (equivalent)
        # pc.url.load(samples=10000).display()
        ```

        ---

        :param samples: number of points to sample from the mesh.
        """
        self.load(samples=samples, skip_materials=False).display()

build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url

display(samples=10000)

Plot point cloud from url.

First, it loads the point cloud into a PointsAndColors object, and then calls display on it. The following is therefore equivalent:


import numpy as np
from docarray import BaseDoc

from docarray.documents import PointCloud3D

pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

# option 1
# pc.url.display()

# option 2 (equivalent)
# pc.url.load(samples=10000).display()

Parameters:

Name Type Description Default
samples int

number of points to sample from the mesh.

10000
Source code in docarray/typing/url/url_3d/point_cloud_url.py
def display(
    self,
    samples: int = 10000,
) -> None:
    """
    Plot point cloud from url.

    First, it loads the point cloud into a `PointsAndColors` object, and then
    calls display on it. The following is therefore equivalent:

    ---

    ```python
    import numpy as np
    from docarray import BaseDoc

    from docarray.documents import PointCloud3D

    pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    # option 1
    # pc.url.display()

    # option 2 (equivalent)
    # pc.url.load(samples=10000).display()
    ```

    ---

    :param samples: number of points to sample from the mesh.
    """
    self.load(samples=samples, skip_materials=False).display()

extra_extensions() classmethod

Returns a list of additional file extensions that are valid for this class but cannot be identified by the mimetypes library.

Source code in docarray/typing/url/url_3d/point_cloud_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """
    Returns a list of additional file extensions that are valid for this class
    but cannot be identified by the mimetypes library.
    """
    return POINT_CLOUD_EXTRA_EXTENSIONS

from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)

is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()

load(samples, multiple_geometries=False, skip_materials=True, trimesh_args=None)

Load the data from the url into an NdArray containing point cloud information.


import numpy as np
from docarray import BaseDoc

from docarray.typing import PointCloud3DUrl


class MyDoc(BaseDoc):
    point_cloud_url: PointCloud3DUrl


doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

# point_cloud = doc.point_cloud_url.load(samples=100)

# assert isinstance(point_cloud, np.ndarray)
# assert point_cloud.shape == (100, 3)

Parameters:

Name Type Description Default
samples int

number of points to sample from the mesh

required
multiple_geometries bool

if False, store point cloud in 2D np.ndarray. If True, store point clouds from multiple geometries in 3D np.ndarray.

False
skip_materials bool

Skip materials if True, else load.

True
trimesh_args Optional[Dict[str, Any]]

dictionary of additional arguments for trimesh.load() or trimesh.load_remote().

None

Returns:

Type Description
PointsAndColors

np.ndarray representing the point cloud

Source code in docarray/typing/url/url_3d/point_cloud_url.py
def load(
    self: T,
    samples: int,
    multiple_geometries: bool = False,
    skip_materials: bool = True,
    trimesh_args: Optional[Dict[str, Any]] = None,
) -> 'PointsAndColors':
    """
    Load the data from the url into an `NdArray` containing point cloud information.


    ---

    ```python
    import numpy as np
    from docarray import BaseDoc

    from docarray.typing import PointCloud3DUrl


    class MyDoc(BaseDoc):
        point_cloud_url: PointCloud3DUrl


    doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")

    # point_cloud = doc.point_cloud_url.load(samples=100)

    # assert isinstance(point_cloud, np.ndarray)
    # assert point_cloud.shape == (100, 3)
    ```

    ---

    :param samples: number of points to sample from the mesh
    :param multiple_geometries: if False, store point cloud in 2D np.ndarray.
        If True, store point clouds from multiple geometries in 3D np.ndarray.
    :param skip_materials: Skip materials if True, else load.
    :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
        or `trimesh.load_remote()`.

    :return: np.ndarray representing the point cloud
    """
    from docarray.documents.point_cloud.points_and_colors import PointsAndColors

    if not trimesh_args:
        trimesh_args = {}

    if multiple_geometries:
        # try to coerce everything into a scene
        scene = self._load_trimesh_instance(
            force='scene', skip_materials=skip_materials, **trimesh_args
        )
        point_cloud = np.stack(
            [np.array(geo.sample(samples)) for geo in scene.geometry.values()],
            axis=0,
        )
    else:
        # combine a scene into a single mesh
        mesh = self._load_trimesh_instance(force='mesh', **trimesh_args)
        point_cloud = np.array(mesh.sample(samples))

    points = parse_obj_as(NdArray, point_cloud)
    return PointsAndColors(points=points, colors=None)

load_bytes(timeout=None)

Convert url to bytes. This will either load or download the file and save it into a bytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if URI is not local

None

Returns:

Type Description
bytes

bytes.

Source code in docarray/typing/url/any_url.py
def load_bytes(self, timeout: Optional[float] = None) -> bytes:
    """Convert url to bytes. This will either load or download the file and save
    it into a bytes object.
    :param timeout: timeout for urlopen. Only relevant if URI is not local
    :return: bytes.
    """
    if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
        req = urllib.request.Request(
            self, headers={'User-Agent': 'Mozilla/5.0'}
        )
        urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
        with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
            return fp.read()
    elif os.path.exists(self):
        with open(self, 'rb') as fp:
            return fp.read()
    else:
        raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')

validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

TextUrl

Bases: AnyUrl

URL to a text file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/text_url.py
@_register_proto(proto_type_name='text_url')
class TextUrl(AnyUrl):
    """
    URL to a text file.
    Can be remote (web) URL, or a local file path.
    """

    @classmethod
    def mime_type(cls) -> str:
        return TEXT_MIMETYPE

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """
        Returns a list of additional file extensions that are valid for this class
        but cannot be identified by the mimetypes library.
        """
        return TEXT_EXTRA_EXTENSIONS

    def load(self, charset: str = 'utf-8', timeout: Optional[float] = None) -> str:
        """
        Load the text file into a string.


        ---

        ```python
        from docarray import BaseDoc
        from docarray.typing import TextUrl


        class MyDoc(BaseDoc):
            remote_url: TextUrl


        doc = MyDoc(
            remote_url='https://de.wikipedia.org/wiki/Brixen',
        )

        remote_txt = doc.remote_url.load()
        ```

        ---


        :param timeout: timeout (sec) for urlopen network request.
            Only relevant if URL is not local
        :param charset: decoding charset; may be any character set registered with IANA
        :return: the text file content
        """
        _bytes = self.load_bytes(timeout=timeout)
        return _bytes.decode(charset)

build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url

extra_extensions() classmethod

Returns a list of additional file extensions that are valid for this class but cannot be identified by the mimetypes library.

Source code in docarray/typing/url/text_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """
    Returns a list of additional file extensions that are valid for this class
    but cannot be identified by the mimetypes library.
    """
    return TEXT_EXTRA_EXTENSIONS

from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)

is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()

load(charset='utf-8', timeout=None)

Load the text file into a string.


from docarray import BaseDoc
from docarray.typing import TextUrl


class MyDoc(BaseDoc):
    remote_url: TextUrl


doc = MyDoc(
    remote_url='https://de.wikipedia.org/wiki/Brixen',
)

remote_txt = doc.remote_url.load()

Parameters:

Name Type Description Default
timeout Optional[float]

timeout (sec) for urlopen network request. Only relevant if URL is not local

None
charset str

decoding charset; may be any character set registered with IANA

'utf-8'

Returns:

Type Description
str

the text file content

Source code in docarray/typing/url/text_url.py
def load(self, charset: str = 'utf-8', timeout: Optional[float] = None) -> str:
    """
    Load the text file into a string.


    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import TextUrl


    class MyDoc(BaseDoc):
        remote_url: TextUrl


    doc = MyDoc(
        remote_url='https://de.wikipedia.org/wiki/Brixen',
    )

    remote_txt = doc.remote_url.load()
    ```

    ---


    :param timeout: timeout (sec) for urlopen network request.
        Only relevant if URL is not local
    :param charset: decoding charset; may be any character set registered with IANA
    :return: the text file content
    """
    _bytes = self.load_bytes(timeout=timeout)
    return _bytes.decode(charset)

load_bytes(timeout=None)

Convert url to bytes. This will either load or download the file and save it into a bytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if URI is not local

None

Returns:

Type Description
bytes

bytes.

Source code in docarray/typing/url/any_url.py
def load_bytes(self, timeout: Optional[float] = None) -> bytes:
    """Convert url to bytes. This will either load or download the file and save
    it into a bytes object.
    :param timeout: timeout for urlopen. Only relevant if URI is not local
    :return: bytes.
    """
    if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
        req = urllib.request.Request(
            self, headers={'User-Agent': 'Mozilla/5.0'}
        )
        urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
        with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
            return fp.read()
    elif os.path.exists(self):
        with open(self, 'rb') as fp:
            return fp.read()
    else:
        raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')

validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

VideoUrl

Bases: AnyUrl

URL to a video file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/video_url.py
@_register_proto(proto_type_name='video_url')
class VideoUrl(AnyUrl):
    """
    URL to a video file.
    Can be remote (web) URL, or a local file path.
    """

    @classmethod
    def mime_type(cls) -> str:
        return VIDEO_MIMETYPE

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """
        Returns a list of additional file extensions that are valid for this class
        but cannot be identified by the mimetypes library.
        """
        return []

    def load(self: T, **kwargs) -> VideoLoadResult:
        """
        Load the data from the url into a `NamedTuple` of
        [`VideoNdArray`][docarray.typing.VideoNdArray],
        [`AudioNdArray`][docarray.typing.AudioNdArray]
        and [`NdArray`][docarray.typing.NdArray].

        ---

        ```python
        from typing import Optional

        from docarray import BaseDoc

        from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray


        class MyDoc(BaseDoc):
            video_url: VideoUrl
            video: Optional[VideoNdArray] = None
            audio: Optional[AudioNdArray] = None
            key_frame_indices: Optional[NdArray] = None


        doc = MyDoc(
            video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true'
        )
        doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load()

        assert isinstance(doc.video, VideoNdArray)
        assert isinstance(doc.audio, AudioNdArray)
        assert isinstance(doc.key_frame_indices, NdArray)
        ```

        ---

        You can load only the key frames (or video, audio respectively):

        ---

        ```python
        from pydantic import parse_obj_as

        from docarray.typing import NdArray, VideoUrl


        url = parse_obj_as(
            VideoUrl,
            'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true',
        )
        key_frame_indices = url.load().key_frame_indices
        assert isinstance(key_frame_indices, NdArray)
        ```

        ---

        :param kwargs: supports all keyword arguments that are being supported by
            av.open() as described [here](https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open)

        :return: [`AudioNdArray`][docarray.typing.AudioNdArray] representing the audio content,
            [`VideoNdArray`][docarray.typing.VideoNdArray] representing the images of the video,
            [`NdArray`][docarray.typing.NdArray] of the key frame indices.
        """
        buffer = self.load_bytes(**kwargs)
        return buffer.load()

    def load_bytes(self, timeout: Optional[float] = None) -> VideoBytes:
        """
        Convert url to [`VideoBytes`][docarray.typing.VideoBytes]. This will either load or download
        the file and save it into an [`VideoBytes`][docarray.typing.VideoBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`VideoBytes`][docarray.typing.VideoBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return VideoBytes(bytes_)

    def display(self):
        """
        Play video from url in notebook.
        """
        if is_notebook():
            from IPython.display import display

            remote_url = True if self.startswith('http') else False

            if remote_url:
                from IPython.display import Video

                b = self.load_bytes()
                display(Video(data=b, embed=True, mimetype='video/mp4'))
            else:
                import os

                from IPython.display import HTML

                path = os.path.relpath(self)
                src = f'''
                    <body>
                    <video width="320" height="240" autoplay muted controls>
                    <source src="{path}">
                    Your browser does not support the video tag.
                    </video>
                    </body>
                    '''
                display(HTML(src))

        else:
            warnings.warn('Display of video is only possible in a notebook.')

build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url

display()

Play video from url in notebook.

Source code in docarray/typing/url/video_url.py
def display(self):
    """
    Play video from url in notebook.
    """
    if is_notebook():
        from IPython.display import display

        remote_url = True if self.startswith('http') else False

        if remote_url:
            from IPython.display import Video

            b = self.load_bytes()
            display(Video(data=b, embed=True, mimetype='video/mp4'))
        else:
            import os

            from IPython.display import HTML

            path = os.path.relpath(self)
            src = f'''
                <body>
                <video width="320" height="240" autoplay muted controls>
                <source src="{path}">
                Your browser does not support the video tag.
                </video>
                </body>
                '''
            display(HTML(src))

    else:
        warnings.warn('Display of video is only possible in a notebook.')

extra_extensions() classmethod

Returns a list of additional file extensions that are valid for this class but cannot be identified by the mimetypes library.

Source code in docarray/typing/url/video_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """
    Returns a list of additional file extensions that are valid for this class
    but cannot be identified by the mimetypes library.
    """
    return []

from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)

is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()

load(**kwargs)

Load the data from the url into a NamedTuple of VideoNdArray, AudioNdArray and NdArray.


from typing import Optional

from docarray import BaseDoc

from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray


class MyDoc(BaseDoc):
    video_url: VideoUrl
    video: Optional[VideoNdArray] = None
    audio: Optional[AudioNdArray] = None
    key_frame_indices: Optional[NdArray] = None


doc = MyDoc(
    video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true'
)
doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load()

assert isinstance(doc.video, VideoNdArray)
assert isinstance(doc.audio, AudioNdArray)
assert isinstance(doc.key_frame_indices, NdArray)

You can load only the key frames (or video, audio respectively):


from pydantic import parse_obj_as

from docarray.typing import NdArray, VideoUrl


url = parse_obj_as(
    VideoUrl,
    'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true',
)
key_frame_indices = url.load().key_frame_indices
assert isinstance(key_frame_indices, NdArray)

Parameters:

Name Type Description Default
kwargs

supports all keyword arguments that are being supported by av.open() as described here

{}

Returns:

Type Description
VideoLoadResult

AudioNdArray representing the audio content, VideoNdArray representing the images of the video, NdArray of the key frame indices.

Source code in docarray/typing/url/video_url.py
def load(self: T, **kwargs) -> VideoLoadResult:
    """
    Load the data from the url into a `NamedTuple` of
    [`VideoNdArray`][docarray.typing.VideoNdArray],
    [`AudioNdArray`][docarray.typing.AudioNdArray]
    and [`NdArray`][docarray.typing.NdArray].

    ---

    ```python
    from typing import Optional

    from docarray import BaseDoc

    from docarray.typing import VideoUrl, VideoNdArray, AudioNdArray, NdArray


    class MyDoc(BaseDoc):
        video_url: VideoUrl
        video: Optional[VideoNdArray] = None
        audio: Optional[AudioNdArray] = None
        key_frame_indices: Optional[NdArray] = None


    doc = MyDoc(
        video_url='https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true'
    )
    doc.video, doc.audio, doc.key_frame_indices = doc.video_url.load()

    assert isinstance(doc.video, VideoNdArray)
    assert isinstance(doc.audio, AudioNdArray)
    assert isinstance(doc.key_frame_indices, NdArray)
    ```

    ---

    You can load only the key frames (or video, audio respectively):

    ---

    ```python
    from pydantic import parse_obj_as

    from docarray.typing import NdArray, VideoUrl


    url = parse_obj_as(
        VideoUrl,
        'https://github.com/docarray/docarray/blob/main/tests/toydata/mov_bbb.mp4?raw=true',
    )
    key_frame_indices = url.load().key_frame_indices
    assert isinstance(key_frame_indices, NdArray)
    ```

    ---

    :param kwargs: supports all keyword arguments that are being supported by
        av.open() as described [here](https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open)

    :return: [`AudioNdArray`][docarray.typing.AudioNdArray] representing the audio content,
        [`VideoNdArray`][docarray.typing.VideoNdArray] representing the images of the video,
        [`NdArray`][docarray.typing.NdArray] of the key frame indices.
    """
    buffer = self.load_bytes(**kwargs)
    return buffer.load()

load_bytes(timeout=None)

Convert url to VideoBytes. This will either load or download the file and save it into an VideoBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
VideoBytes

VideoBytes object

Source code in docarray/typing/url/video_url.py
def load_bytes(self, timeout: Optional[float] = None) -> VideoBytes:
    """
    Convert url to [`VideoBytes`][docarray.typing.VideoBytes]. This will either load or download
    the file and save it into an [`VideoBytes`][docarray.typing.VideoBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`VideoBytes`][docarray.typing.VideoBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return VideoBytes(bytes_)

validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

any_url

AnyUrl

Bases: AnyUrl, AbstractType

Source code in docarray/typing/url/any_url.py
@_register_proto(proto_type_name='any_url')
class AnyUrl(BaseAnyUrl, AbstractType):
    host_required = (
        False  # turn off host requirement to allow passing of local paths as URL
    )

    @classmethod
    def mime_type(cls) -> str:
        """Returns the mime type associated with the class."""
        raise NotImplementedError

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """Returns a list of allowed file extensions for the class
        that are not covered by the mimetypes library."""
        raise NotImplementedError

    def _to_node_protobuf(self) -> 'NodeProto':
        """Convert Document into a NodeProto protobuf message. This function should
        be called when the Document is nested into another Document that need to
        be converted into a protobuf

        :return: the nested item protobuf message
        """
        from docarray.proto import NodeProto

        return NodeProto(text=str(self), type=self._proto_type_name)

    @staticmethod
    def _get_url_extension(url: str) -> str:
        """
        Extracts and returns the file extension from a given URL.
        If no file extension is present, the function returns an empty string.


        :param url: The URL to extract the file extension from.
        :return: The file extension without the period, if one exists,
            otherwise an empty string.
        """

        parsed_url = urllib.parse.urlparse(url)
        ext = os.path.splitext(parsed_url.path)[1]
        ext = ext[1:] if ext.startswith('.') else ext
        return ext

    @classmethod
    def is_extension_allowed(cls, value: Any) -> bool:
        """
        Check if the file extension of the URL is allowed for this class.
        First, it guesses the mime type of the file. If it fails to detect the
        mime type, it then checks the extra file extensions.
        Note: This method assumes that any URL without an extension is valid.

        :param value: The URL or file path.
        :return: True if the extension is allowed, False otherwise
        """
        if cls is AnyUrl:
            return True

        url_parts = value.split('?')
        extension = cls._get_url_extension(value)
        if not extension:
            return True

        mimetype, _ = mimetypes.guess_type(url_parts[0])
        if mimetype and mimetype.startswith(cls.mime_type()):
            return True

        return extension in cls.extra_extensions()

    @classmethod
    def validate(
        cls: Type[T],
        value: Union[T, np.ndarray, Any],
        field: 'ModelField',
        config: 'BaseConfig',
    ) -> T:
        import os

        abs_path: Union[T, np.ndarray, Any]
        if (
            isinstance(value, str)
            and not value.startswith('http')
            and not os.path.isabs(value)
        ):
            input_is_relative_path = True
            abs_path = os.path.abspath(value)
        else:
            input_is_relative_path = False
            abs_path = value

        url = super().validate(abs_path, field, config)  # basic url validation

        if not cls.is_extension_allowed(value):
            raise ValueError(
                f"The file '{value}' is not in a valid format for class '{cls.__name__}'."
            )

        return cls(str(value if input_is_relative_path else url), scheme=None)

    @classmethod
    def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
        """
        A method used to validate parts of a URL.
        Our URLs should be able to function both in local and remote settings.
        Therefore, we allow missing `scheme`, making it possible to pass a file
        path without prefix.
        If `scheme` is missing, we assume it is a local file path.
        """
        scheme = parts['scheme']
        if scheme is None:
            # allow missing scheme, unlike pydantic
            pass

        elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
            raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

        if validate_port:
            cls._validate_port(parts['port'])

        user = parts['user']
        if cls.user_required and user is None:
            raise errors.UrlUserInfoError()

        return parts

    @classmethod
    def build(
        cls,
        *,
        scheme: str,
        user: Optional[str] = None,
        password: Optional[str] = None,
        host: str,
        port: Optional[str] = None,
        path: Optional[str] = None,
        query: Optional[str] = None,
        fragment: Optional[str] = None,
        **_kwargs: str,
    ) -> str:
        """
        Build a URL from its parts.
        The only difference from the pydantic implementation is that we allow
        missing `scheme`, making it possible to pass a file path without prefix.
        """

        # allow missing scheme, unlike pydantic
        scheme_ = scheme if scheme is not None else ''
        url = super().build(
            scheme=scheme_,
            user=user,
            password=password,
            host=host,
            port=port,
            path=path,
            query=query,
            fragment=fragment,
            **_kwargs,
        )
        if scheme is None and url.startswith('://'):
            # remove the `://` prefix, since scheme is missing
            url = url[3:]
        return url

    @classmethod
    def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
        """
        Read url from a proto msg.
        :param pb_msg:
        :return: url
        """
        return parse_obj_as(cls, pb_msg)

    def load_bytes(self, timeout: Optional[float] = None) -> bytes:
        """Convert url to bytes. This will either load or download the file and save
        it into a bytes object.
        :param timeout: timeout for urlopen. Only relevant if URI is not local
        :return: bytes.
        """
        if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
            req = urllib.request.Request(
                self, headers={'User-Agent': 'Mozilla/5.0'}
            )
            urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
            with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
                return fp.read()
        elif os.path.exists(self):
            with open(self, 'rb') as fp:
                return fp.read()
        else:
            raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')
build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url
extra_extensions() classmethod

Returns a list of allowed file extensions for the class that are not covered by the mimetypes library.

Source code in docarray/typing/url/any_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """Returns a list of allowed file extensions for the class
    that are not covered by the mimetypes library."""
    raise NotImplementedError
from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)
is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()
load_bytes(timeout=None)

Convert url to bytes. This will either load or download the file and save it into a bytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if URI is not local

None

Returns:

Type Description
bytes

bytes.

Source code in docarray/typing/url/any_url.py
def load_bytes(self, timeout: Optional[float] = None) -> bytes:
    """Convert url to bytes. This will either load or download the file and save
    it into a bytes object.
    :param timeout: timeout for urlopen. Only relevant if URI is not local
    :return: bytes.
    """
    if urllib.parse.urlparse(self).scheme in {'http', 'https', 'data'}:
        req = urllib.request.Request(
            self, headers={'User-Agent': 'Mozilla/5.0'}
        )
        urlopen_kwargs = {'timeout': timeout} if timeout is not None else {}
        with urllib.request.urlopen(req, **urlopen_kwargs) as fp:  # type: ignore
            return fp.read()
    elif os.path.exists(self):
        with open(self, 'rb') as fp:
            return fp.read()
    else:
        raise FileNotFoundError(f'`{self}` is not a URL or a valid local path')
mime_type() classmethod

Returns the mime type associated with the class.

Source code in docarray/typing/url/any_url.py
@classmethod
def mime_type(cls) -> str:
    """Returns the mime type associated with the class."""
    raise NotImplementedError
validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

audio_url

AudioUrl

Bases: AnyUrl

URL to an audio file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/audio_url.py
@_register_proto(proto_type_name='audio_url')
class AudioUrl(AnyUrl):
    """
    URL to an audio file.
    Can be remote (web) URL, or a local file path.
    """

    @classmethod
    def mime_type(cls) -> str:
        return AUDIO_MIMETYPE

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """
        Returns a list of additional file extensions that are valid for this class
        but cannot be identified by the mimetypes library.
        """
        return []

    def load(self: T) -> Tuple[AudioNdArray, int]:
        """
        Load the data from the url into an [`AudioNdArray`][docarray.typing.AudioNdArray]
        and the frame rate.

        ---

        ```python
        from typing import Optional

        from docarray import BaseDoc
        from docarray.typing import AudioNdArray, AudioUrl


        class MyDoc(BaseDoc):
            audio_url: AudioUrl
            audio_tensor: Optional[AudioNdArray] = None


        doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
        doc.audio_tensor, _ = doc.audio_url.load()
        assert isinstance(doc.audio_tensor, AudioNdArray)
        ```

        ---

        :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing
            the audio file content, and an integer representing the frame rate.

        """
        bytes_ = self.load_bytes()
        return bytes_.load()

    def load_bytes(self, timeout: Optional[float] = None) -> AudioBytes:
        """
        Convert url to [`AudioBytes`][docarray.typing.AudioBytes]. This will either load or
        download the file and save it into an [`AudioBytes`][docarray.typing.AudioBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`AudioBytes`][docarray.typing.AudioBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return AudioBytes(bytes_)

    def display(self):
        """
        Play the audio sound from url in notebook.
        """
        if is_notebook():
            from IPython.display import Audio, display

            remote_url = True if self.startswith('http') else False

            if remote_url:
                display(Audio(data=self))
            else:
                display(Audio(filename=self))
        else:
            warnings.warn('Display of audio is only possible in a notebook.')
build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url
display()

Play the audio sound from url in notebook.

Source code in docarray/typing/url/audio_url.py
def display(self):
    """
    Play the audio sound from url in notebook.
    """
    if is_notebook():
        from IPython.display import Audio, display

        remote_url = True if self.startswith('http') else False

        if remote_url:
            display(Audio(data=self))
        else:
            display(Audio(filename=self))
    else:
        warnings.warn('Display of audio is only possible in a notebook.')
extra_extensions() classmethod

Returns a list of additional file extensions that are valid for this class but cannot be identified by the mimetypes library.

Source code in docarray/typing/url/audio_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """
    Returns a list of additional file extensions that are valid for this class
    but cannot be identified by the mimetypes library.
    """
    return []
from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)
is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()
load()

Load the data from the url into an AudioNdArray and the frame rate.


from typing import Optional

from docarray import BaseDoc
from docarray.typing import AudioNdArray, AudioUrl


class MyDoc(BaseDoc):
    audio_url: AudioUrl
    audio_tensor: Optional[AudioNdArray] = None


doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
doc.audio_tensor, _ = doc.audio_url.load()
assert isinstance(doc.audio_tensor, AudioNdArray)

Returns:

Type Description
Tuple[AudioNdArray, int]

tuple of an AudioNdArray representing the audio file content, and an integer representing the frame rate.

Source code in docarray/typing/url/audio_url.py
def load(self: T) -> Tuple[AudioNdArray, int]:
    """
    Load the data from the url into an [`AudioNdArray`][docarray.typing.AudioNdArray]
    and the frame rate.

    ---

    ```python
    from typing import Optional

    from docarray import BaseDoc
    from docarray.typing import AudioNdArray, AudioUrl


    class MyDoc(BaseDoc):
        audio_url: AudioUrl
        audio_tensor: Optional[AudioNdArray] = None


    doc = MyDoc(audio_url='https://www.kozco.com/tech/piano2.wav')
    doc.audio_tensor, _ = doc.audio_url.load()
    assert isinstance(doc.audio_tensor, AudioNdArray)
    ```

    ---

    :return: tuple of an [`AudioNdArray`][docarray.typing.AudioNdArray] representing
        the audio file content, and an integer representing the frame rate.

    """
    bytes_ = self.load_bytes()
    return bytes_.load()
load_bytes(timeout=None)

Convert url to AudioBytes. This will either load or download the file and save it into an AudioBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
AudioBytes

AudioBytes object

Source code in docarray/typing/url/audio_url.py
def load_bytes(self, timeout: Optional[float] = None) -> AudioBytes:
    """
    Convert url to [`AudioBytes`][docarray.typing.AudioBytes]. This will either load or
    download the file and save it into an [`AudioBytes`][docarray.typing.AudioBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`AudioBytes`][docarray.typing.AudioBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return AudioBytes(bytes_)
validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

image_url

ImageUrl

Bases: AnyUrl

URL to an image file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/image_url.py
@_register_proto(proto_type_name='image_url')
class ImageUrl(AnyUrl):
    """
    URL to an image file.
    Can be remote (web) URL, or a local file path.
    """

    @classmethod
    def mime_type(cls) -> str:
        return IMAGE_MIMETYPE

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """
        Returns a list of additional file extensions that are valid for this class
        but cannot be identified by the mimetypes library.
        """
        return []

    def load_pil(self, timeout: Optional[float] = None) -> 'PILImage.Image':
        """
        Load the image from the bytes into a `PIL.Image.Image` instance

        ---

        ```python
        from pydantic import parse_obj_as

        from docarray import BaseDoc
        from docarray.typing import ImageUrl

        img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

        img_url = parse_obj_as(ImageUrl, img_url)
        img = img_url.load_pil()

        from PIL.Image import Image

        assert isinstance(img, Image)
        ```

        ---
        :return: a Pillow image
        """
        from docarray.typing.bytes.image_bytes import ImageBytes

        return ImageBytes(self.load_bytes(timeout=timeout)).load_pil()

    def load(
        self,
        width: Optional[int] = None,
        height: Optional[int] = None,
        axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
        timeout: Optional[float] = None,
    ) -> ImageNdArray:
        """
        Load the data from the url into an [`ImageNdArray`][docarray.typing.ImageNdArray]

        ---

        ```python
        from docarray import BaseDoc
        from docarray.typing import ImageUrl, ImageNdArray


        class MyDoc(BaseDoc):
            img_url: ImageUrl


        doc = MyDoc(
            img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
            "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
        )

        img_tensor = doc.img_url.load()
        assert isinstance(img_tensor, ImageNdArray)

        img_tensor = doc.img_url.load(height=224, width=224)
        assert img_tensor.shape == (224, 224, 3)

        layout = ('C', 'W', 'H')
        img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
        assert img_tensor.shape == (3, 200, 100)
        ```

        ---

        :param width: width of the image tensor.
        :param height: height of the image tensor.
        :param axis_layout: ordering of the different image axes.
            'H' = height, 'W' = width, 'C' = color channel
        :param timeout: timeout (sec) for urlopen network request.
            Only relevant if URL is not local
        :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
        """
        from docarray.typing.bytes.image_bytes import ImageBytes

        buffer = ImageBytes(self.load_bytes(timeout=timeout))
        return buffer.load(width, height, axis_layout)

    def load_bytes(self, timeout: Optional[float] = None) -> ImageBytes:
        """
        Convert url to [`ImageBytes`][docarray.typing.ImageBytes]. This will either load or
        download the file and save it into an [`ImageBytes`][docarray.typing.ImageBytes] object.

        :param timeout: timeout for urlopen. Only relevant if url is not local
        :return: [`ImageBytes`][docarray.typing.ImageBytes] object
        """
        bytes_ = super().load_bytes(timeout=timeout)
        return ImageBytes(bytes_)

    def display(self) -> None:
        """
        Display image data from url in notebook.
        """
        if is_notebook():
            from IPython.display import Image, display

            remote_url = True if self.startswith('http') else False
            if remote_url:
                display(Image(url=self))
            else:
                display(Image(filename=self))
        else:
            warnings.warn('Display of image is only possible in a notebook.')
build(*, scheme, user=None, password=None, host, port=None, path=None, query=None, fragment=None, **_kwargs) classmethod

Build a URL from its parts. The only difference from the pydantic implementation is that we allow missing scheme, making it possible to pass a file path without prefix.

Source code in docarray/typing/url/any_url.py
@classmethod
def build(
    cls,
    *,
    scheme: str,
    user: Optional[str] = None,
    password: Optional[str] = None,
    host: str,
    port: Optional[str] = None,
    path: Optional[str] = None,
    query: Optional[str] = None,
    fragment: Optional[str] = None,
    **_kwargs: str,
) -> str:
    """
    Build a URL from its parts.
    The only difference from the pydantic implementation is that we allow
    missing `scheme`, making it possible to pass a file path without prefix.
    """

    # allow missing scheme, unlike pydantic
    scheme_ = scheme if scheme is not None else ''
    url = super().build(
        scheme=scheme_,
        user=user,
        password=password,
        host=host,
        port=port,
        path=path,
        query=query,
        fragment=fragment,
        **_kwargs,
    )
    if scheme is None and url.startswith('://'):
        # remove the `://` prefix, since scheme is missing
        url = url[3:]
    return url
display()

Display image data from url in notebook.

Source code in docarray/typing/url/image_url.py
def display(self) -> None:
    """
    Display image data from url in notebook.
    """
    if is_notebook():
        from IPython.display import Image, display

        remote_url = True if self.startswith('http') else False
        if remote_url:
            display(Image(url=self))
        else:
            display(Image(filename=self))
    else:
        warnings.warn('Display of image is only possible in a notebook.')
extra_extensions() classmethod

Returns a list of additional file extensions that are valid for this class but cannot be identified by the mimetypes library.

Source code in docarray/typing/url/image_url.py
@classmethod
def extra_extensions(cls) -> List[str]:
    """
    Returns a list of additional file extensions that are valid for this class
    but cannot be identified by the mimetypes library.
    """
    return []
from_protobuf(pb_msg) classmethod

Read url from a proto msg.

Parameters:

Name Type Description Default
pb_msg str
required

Returns:

Type Description
T

url

Source code in docarray/typing/url/any_url.py
@classmethod
def from_protobuf(cls: Type[T], pb_msg: 'str') -> T:
    """
    Read url from a proto msg.
    :param pb_msg:
    :return: url
    """
    return parse_obj_as(cls, pb_msg)
is_extension_allowed(value) classmethod

Check if the file extension of the URL is allowed for this class. First, it guesses the mime type of the file. If it fails to detect the mime type, it then checks the extra file extensions. Note: This method assumes that any URL without an extension is valid.

Parameters:

Name Type Description Default
value Any

The URL or file path.

required

Returns:

Type Description
bool

True if the extension is allowed, False otherwise

Source code in docarray/typing/url/any_url.py
@classmethod
def is_extension_allowed(cls, value: Any) -> bool:
    """
    Check if the file extension of the URL is allowed for this class.
    First, it guesses the mime type of the file. If it fails to detect the
    mime type, it then checks the extra file extensions.
    Note: This method assumes that any URL without an extension is valid.

    :param value: The URL or file path.
    :return: True if the extension is allowed, False otherwise
    """
    if cls is AnyUrl:
        return True

    url_parts = value.split('?')
    extension = cls._get_url_extension(value)
    if not extension:
        return True

    mimetype, _ = mimetypes.guess_type(url_parts[0])
    if mimetype and mimetype.startswith(cls.mime_type()):
        return True

    return extension in cls.extra_extensions()
load(width=None, height=None, axis_layout=('H', 'W', 'C'), timeout=None)

Load the data from the url into an ImageNdArray


from docarray import BaseDoc
from docarray.typing import ImageUrl, ImageNdArray


class MyDoc(BaseDoc):
    img_url: ImageUrl


doc = MyDoc(
    img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
    "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
)

img_tensor = doc.img_url.load()
assert isinstance(img_tensor, ImageNdArray)

img_tensor = doc.img_url.load(height=224, width=224)
assert img_tensor.shape == (224, 224, 3)

layout = ('C', 'W', 'H')
img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
assert img_tensor.shape == (3, 200, 100)

Parameters:

Name Type Description Default
width Optional[int]

width of the image tensor.

None
height Optional[int]

height of the image tensor.

None
axis_layout Tuple[str, str, str]

ordering of the different image axes. 'H' = height, 'W' = width, 'C' = color channel

('H', 'W', 'C')
timeout Optional[float]

timeout (sec) for urlopen network request. Only relevant if URL is not local

None

Returns:

Type Description
ImageNdArray

ImageNdArray representing the image as RGB values

Source code in docarray/typing/url/image_url.py
def load(
    self,
    width: Optional[int] = None,
    height: Optional[int] = None,
    axis_layout: Tuple[str, str, str] = ('H', 'W', 'C'),
    timeout: Optional[float] = None,
) -> ImageNdArray:
    """
    Load the data from the url into an [`ImageNdArray`][docarray.typing.ImageNdArray]

    ---

    ```python
    from docarray import BaseDoc
    from docarray.typing import ImageUrl, ImageNdArray


    class MyDoc(BaseDoc):
        img_url: ImageUrl


    doc = MyDoc(
        img_url="https://upload.wikimedia.org/wikipedia/commons/8/80/"
        "Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"
    )

    img_tensor = doc.img_url.load()
    assert isinstance(img_tensor, ImageNdArray)

    img_tensor = doc.img_url.load(height=224, width=224)
    assert img_tensor.shape == (224, 224, 3)

    layout = ('C', 'W', 'H')
    img_tensor = doc.img_url.load(height=100, width=200, axis_layout=layout)
    assert img_tensor.shape == (3, 200, 100)
    ```

    ---

    :param width: width of the image tensor.
    :param height: height of the image tensor.
    :param axis_layout: ordering of the different image axes.
        'H' = height, 'W' = width, 'C' = color channel
    :param timeout: timeout (sec) for urlopen network request.
        Only relevant if URL is not local
    :return: [`ImageNdArray`][docarray.typing.ImageNdArray] representing the image as RGB values
    """
    from docarray.typing.bytes.image_bytes import ImageBytes

    buffer = ImageBytes(self.load_bytes(timeout=timeout))
    return buffer.load(width, height, axis_layout)
load_bytes(timeout=None)

Convert url to ImageBytes. This will either load or download the file and save it into an ImageBytes object.

Parameters:

Name Type Description Default
timeout Optional[float]

timeout for urlopen. Only relevant if url is not local

None

Returns:

Type Description
ImageBytes

ImageBytes object

Source code in docarray/typing/url/image_url.py
def load_bytes(self, timeout: Optional[float] = None) -> ImageBytes:
    """
    Convert url to [`ImageBytes`][docarray.typing.ImageBytes]. This will either load or
    download the file and save it into an [`ImageBytes`][docarray.typing.ImageBytes] object.

    :param timeout: timeout for urlopen. Only relevant if url is not local
    :return: [`ImageBytes`][docarray.typing.ImageBytes] object
    """
    bytes_ = super().load_bytes(timeout=timeout)
    return ImageBytes(bytes_)
load_pil(timeout=None)

Load the image from the bytes into a PIL.Image.Image instance


from pydantic import parse_obj_as

from docarray import BaseDoc
from docarray.typing import ImageUrl

img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

img_url = parse_obj_as(ImageUrl, img_url)
img = img_url.load_pil()

from PIL.Image import Image

assert isinstance(img, Image)

Returns:

Type Description
Image

a Pillow image

Source code in docarray/typing/url/image_url.py
def load_pil(self, timeout: Optional[float] = None) -> 'PILImage.Image':
    """
    Load the image from the bytes into a `PIL.Image.Image` instance

    ---

    ```python
    from pydantic import parse_obj_as

    from docarray import BaseDoc
    from docarray.typing import ImageUrl

    img_url = "https://upload.wikimedia.org/wikipedia/commons/8/80/Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg"

    img_url = parse_obj_as(ImageUrl, img_url)
    img = img_url.load_pil()

    from PIL.Image import Image

    assert isinstance(img, Image)
    ```

    ---
    :return: a Pillow image
    """
    from docarray.typing.bytes.image_bytes import ImageBytes

    return ImageBytes(self.load_bytes(timeout=timeout)).load_pil()
validate_parts(parts, validate_port=True) classmethod

A method used to validate parts of a URL. Our URLs should be able to function both in local and remote settings. Therefore, we allow missing scheme, making it possible to pass a file path without prefix. If scheme is missing, we assume it is a local file path.

Source code in docarray/typing/url/any_url.py
@classmethod
def validate_parts(cls, parts: 'Parts', validate_port: bool = True) -> 'Parts':
    """
    A method used to validate parts of a URL.
    Our URLs should be able to function both in local and remote settings.
    Therefore, we allow missing `scheme`, making it possible to pass a file
    path without prefix.
    If `scheme` is missing, we assume it is a local file path.
    """
    scheme = parts['scheme']
    if scheme is None:
        # allow missing scheme, unlike pydantic
        pass

    elif cls.allowed_schemes and scheme.lower() not in cls.allowed_schemes:
        raise errors.UrlSchemePermittedError(set(cls.allowed_schemes))

    if validate_port:
        cls._validate_port(parts['port'])

    user = parts['user']
    if cls.user_required and user is None:
        raise errors.UrlUserInfoError()

    return parts

text_url

TextUrl

Bases: AnyUrl

URL to a text file. Can be remote (web) URL, or a local file path.

Source code in docarray/typing/url/text_url.py
@_register_proto(proto_type_name='text_url')
class TextUrl(AnyUrl):
    """
    URL to a text file.
    Can be remote (web) URL, or a local file path.
    """

    @classmethod
    def mime_type(cls) -> str:
        return TEXT_MIMETYPE

    @classmethod
    def extra_extensions(cls) -> List[str]:
        """
        Returns a list of additional file extensions that are valid for this class
        but cannot be identified by the mimetypes library.
        """
        return TEXT_EXTRA_EXTENSIONS

    def load(self, charset: str = 'utf-8', timeout: Optional[float] = None) -> str:
        """
        Load the text file into a string.


        ---