Bases: BaseDocWithoutId

BaseDoc is the base class for all Documents. This class should be subclassed to create new Document types with a specific schema.

The schema of a Document is defined by the fields of the class.


from docarray import BaseDoc
from docarray.typing import NdArray, ImageUrl
import numpy as np

class MyDoc(BaseDoc):
    embedding: NdArray[512]
    image: ImageUrl

doc = MyDoc(embedding=np.zeros(512), image='')

BaseDoc is a subclass of pydantic.BaseModel and can be used in a similar way.

    id: Optional[ID] = Field(
        description='The ID of the BaseDoc. This is useful for indexing in vector stores. If not set by user, it will automatically be assigned a random value',
        default_factory=lambda: ID(os.urandom(16).hex()),

dict(*, include=None, exclude=None, by_alias=False, skip_defaults=None, exclude_unset=False, exclude_defaults=False, exclude_none=False)

Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.

Source code in docarray/base_doc/
def dict(
    include: Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']] = None,
    exclude: ExcludeType = None,
    by_alias: bool = False,
    skip_defaults: Optional[bool] = None,
    exclude_unset: bool = False,
    exclude_defaults: bool = False,
    exclude_none: bool = False,
) -> 'DictStrAny':
    Generate a dictionary representation of the model, optionally specifying
    which fields to include or exclude.

    exclude, original_exclude, doclist_exclude_fields = self._exclude_doclist(

    data = super().dict(

    for field in doclist_exclude_fields:
        # we need to do this because pydantic will not recognize DocList correctly
        original_exclude = original_exclude or {}
        if field not in original_exclude:
            val = getattr(self, field)
            data[field] = (
                [doc.dict() for doc in val] if val is not None else None

    return data

from_base64(data, protocol='pickle', compress=None) classmethod

Build Document object from binary bytes


Name Type Description Default
data str

a base64 encoded string

protocol Literal['pickle', 'protobuf']

protocol to use. It can be 'pickle' or 'protobuf'

compress Optional[str]

compress method to use



Type Description

a Document object

Source code in docarray/base_doc/mixins/
def from_base64(
    cls: Type[T],
    data: str,
    protocol: Literal['pickle', 'protobuf'] = 'pickle',
    compress: Optional[str] = None,
) -> T:
    """Build Document object from binary bytes

    :param data: a base64 encoded string
    :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
    :param compress: compress method to use
    :return: a Document object
    return cls.from_bytes(base64.b64decode(data), protocol, compress)

from_bytes(data, protocol='protobuf', compress=None) classmethod

Build Document object from binary bytes


Name Type Description Default
data bytes

binary bytes

protocol ProtocolType

protocol to use. It can be 'pickle' or 'protobuf'

compress Optional[str]

compress method to use



Type Description

a Document object

Source code in docarray/base_doc/mixins/
def from_bytes(
    cls: Type[T],
    data: bytes,
    protocol: ProtocolType = 'protobuf',
    compress: Optional[str] = None,
) -> T:
    """Build Document object from binary bytes

    :param data: binary bytes
    :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
    :param compress: compress method to use
    :return: a Document object
    bstr = _decompress_bytes(data, algorithm=compress)
    if protocol == 'pickle':
        return pickle.loads(bstr)
    elif protocol == 'protobuf':
        from docarray.proto import DocProto

        pb_msg = DocProto()
        return cls.from_protobuf(pb_msg)
        raise ValueError(
            f'protocol={protocol} is not supported. Can be only `protobuf` or '
            f'pickle protocols 0-5.'

from_json(data) classmethod

Build Document object from json data


Type Description

a Document object

Source code in docarray/base_doc/mixins/
def from_json(
    cls: Type[T],
    data: str,
) -> T:
    """Build Document object from json data
    :return: a Document object
    # TODO: add tests

    if is_pydantic_v2:
        return cls.model_validate_json(data)
        return cls.parse_raw(data)

from_protobuf(pb_msg) classmethod

create a Document from a protobuf message


Name Type Description Default
pb_msg DocProto

the proto message of the Document



Type Description

a Document initialize with the proto data

Source code in docarray/base_doc/mixins/
def from_protobuf(cls: Type[T], pb_msg: 'DocProto') -> T:
    """create a Document from a protobuf message

    :param pb_msg: the proto message of the Document
    :return: a Document initialize with the proto data

    fields: Dict[str, Any] = {}
    load_extra_field = (
        if is_pydantic_v2
        else cls.Config._load_extra_fields_from_protobuf
    for field_name in
        if (
            not (load_extra_field)
            and field_name not in cls._docarray_fields().keys()
            continue  # optimization we don't even load the data if the key does not
            # match any field in the cls or in the mapping

        fields[field_name] = cls._get_content_from_node_proto(
  [field_name], field_name

    return cls(**fields)

json(*, include=None, exclude=None, by_alias=False, skip_defaults=None, exclude_unset=False, exclude_defaults=False, exclude_none=False, encoder=None, models_as_dict=True, **dumps_kwargs)

Generate a JSON representation of the model, include and exclude arguments as per dict().

encoder is an optional function to supply as default to json.dumps(), other arguments as per json.dumps().

Source code in docarray/base_doc/
def json(
    include: Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']] = None,
    exclude: ExcludeType = None,
    by_alias: bool = False,
    skip_defaults: Optional[bool] = None,
    exclude_unset: bool = False,
    exclude_defaults: bool = False,
    exclude_none: bool = False,
    encoder: Optional[Callable[[Any], Any]] = None,
    models_as_dict: bool = True,
    **dumps_kwargs: Any,
) -> str:
    Generate a JSON representation of the model, `include` and `exclude`
    arguments as per `dict()`.

    `encoder` is an optional function to supply as `default` to json.dumps(),
    other arguments as per `json.dumps()`.
    exclude, original_exclude, doclist_exclude_fields = self._exclude_docarray(

    # this is copy from pydantic code
    if skip_defaults is not None:
            f'{self.__class__.__name__}.json(): "skip_defaults" is deprecated and replaced by "exclude_unset"',
        exclude_unset = skip_defaults
    encoder = cast(Callable[[Any], Any], encoder or self.__json_encoder__)

    # We don't directly call `self.dict()`, which does exactly this with `to_dict=True`
    # because we want to be able to keep raw `BaseModel` instances and not as `dict`.
    # This allows users to write custom JSON encoders for given `BaseModel` classes.
    data = dict(

    # this is the custom part to deal with DocList
    for field in doclist_exclude_fields:
        # we need to do this because pydantic will not recognize DocList correctly
        original_exclude = original_exclude or {}
        if field not in original_exclude:
            data[field] = getattr(
                self, field
            )  # here we need to keep doclist as doclist otherwise if a user want to have a special json config it will not work

    # this is copy from pydantic code
    if self.__custom_root_type__:
        data = data[ROOT_KEY]
    return self.__config__.json_dumps(data, default=encoder, **dumps_kwargs)

parse_raw(b, *, content_type=None, encoding='utf8', proto=None, allow_pickle=False) classmethod

Parse a raw string or bytes into a base doc


Name Type Description Default
b StrBytes
content_type str
encoding str

the encoding to use when parsing a string, defaults to 'utf8'

proto Protocol

protocol to use.

allow_pickle bool

allow pickle protocol



Type Description

a document

Source code in docarray/base_doc/
def parse_raw(
    cls: Type[T],
    b: 'StrBytes',
    content_type: str = None,
    encoding: str = 'utf8',
    proto: 'Protocol' = None,
    allow_pickle: bool = False,
) -> T:
    Parse a raw string or bytes into a base doc
    :param b:
    :param content_type:
    :param encoding: the encoding to use when parsing a string, defaults to 'utf8'
    :param proto: protocol to use.
    :param allow_pickle: allow pickle protocol
    :return: a document
    return super(BaseDocWithoutId, cls).parse_raw(

schema_summary() classmethod

Print a summary of the Documents schema.

Source code in docarray/base_doc/
def schema_summary(cls) -> None:
    """Print a summary of the Documents schema."""
    from docarray.display.document_summary import DocumentSummary



Print non-empty fields and nested structure of this Document object.

Source code in docarray/base_doc/
def summary(self) -> None:
    """Print non-empty fields and nested structure of this Document object."""
    from docarray.display.document_summary import DocumentSummary


to_base64(protocol='protobuf', compress=None)

Serialize a Document object into as base64 string


Name Type Description Default
protocol ProtocolType

protocol to use. It can be 'pickle' or 'protobuf'

compress Optional[str]

compress method to use



Type Description

a base64 encoded string

Source code in docarray/base_doc/mixins/
def to_base64(
    self, protocol: ProtocolType = 'protobuf', compress: Optional[str] = None
) -> str:
    """Serialize a Document object into as base64 string

    :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
    :param compress: compress method to use
    :return: a base64 encoded string
    return base64.b64encode(self.to_bytes(protocol, compress)).decode('utf-8')

to_bytes(protocol='protobuf', compress=None)

Serialize itself into bytes.

For more Pythonic code, please use bytes(...).


Name Type Description Default
protocol ProtocolType

protocol to use. It can be 'pickle' or 'protobuf'

compress Optional[str]

compression algorithm to use



Type Description

the binary serialization in bytes

Source code in docarray/base_doc/mixins/
def to_bytes(
    self, protocol: ProtocolType = 'protobuf', compress: Optional[str] = None
) -> bytes:
    """Serialize itself into bytes.

    For more Pythonic code, please use ``bytes(...)``.

    :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
    :param compress: compression algorithm to use
    :return: the binary serialization in bytes
    import pickle

    if protocol == 'pickle':
        bstr = pickle.dumps(self)
    elif protocol == 'protobuf':
        bstr = self.to_protobuf().SerializePartialToString()
        raise ValueError(
            f'protocol={protocol} is not supported. Can be only `protobuf` or '
            f'pickle protocols 0-5.'
    return _compress_bytes(bstr, algorithm=compress)


Convert Document into a Protobuf message.


Type Description

the protobuf message

Source code in docarray/base_doc/mixins/
def to_protobuf(self: T) -> 'DocProto':
    """Convert Document into a Protobuf message.

    :return: the protobuf message
    from docarray.proto import DocProto

    data = {}
    for field, value in self:
            data[field] = _type_to_protobuf(value)
        except RecursionError as ex:
            if len(ex.args) >= 1:
                ex.args = (
                        f'Field `{field}` contains cyclic reference in memory. '
                        'Could it be your Document is referring to itself?'
            raise ex
        except Exception as ex:
            if len(ex.args) >= 1:
                ex.args = (f'Field `{field}` is problematic',) + ex.args
            raise ex

    return DocProto(data=data)


Updates self with the content of other. Changes are applied to self. Updating one Document with another consists in the following:

  • Setting data properties of the second Document to the first Document if they are not None
  • Concatenating lists and updating sets
  • Updating recursively Documents and DocLists
  • Updating Dictionaries of the left with the right

It behaves as an update operation for Dictionaries, except that since it is applied to a static schema type, the presence of the field is given by the field not having a None value and that DocLists, lists and sets are concatenated. It is worth mentioning that Tuples are not merged together since they are meant to be immutable, so they behave as regular types and the value of self is updated with the value of other.

from typing import List, Optional

from docarray import BaseDoc

class MyDocument(BaseDoc):
    content: str
    title: Optional[str] = None
    tags_: List

doc1 = MyDocument(
    content='Core content of the document', title='Title', tags_=['python', 'AI']
doc2 = MyDocument(content='Core content updated', tags_=['docarray'])

assert doc1.content == 'Core content updated'
assert doc1.title == 'Title'
assert doc1.tags_ == ['python', 'AI', 'docarray']


Name Type Description Default
other T

The Document with which to update the contents of this

Source code in docarray/base_doc/mixins/
def update(self, other: T):
    Updates self with the content of other. Changes are applied to self.
    Updating one Document with another consists in the following:

     - Setting data properties of the second Document to the first Document
     if they are not None
     - Concatenating lists and updating sets
     - Updating recursively Documents and DocLists
     - Updating Dictionaries of the left with the right

    It behaves as an update operation for Dictionaries, except that since
    it is applied to a static schema type, the presence of the field is
    given by the field not having a None value and that DocLists,
    lists and sets are concatenated. It is worth mentioning that Tuples
    are not merged together since they are meant to be immutable,
    so they behave as regular types and the value of `self` is updated
    with the value of `other`.


    from typing import List, Optional

    from docarray import BaseDoc

    class MyDocument(BaseDoc):
        content: str
        title: Optional[str] = None
        tags_: List

    doc1 = MyDocument(
        content='Core content of the document', title='Title', tags_=['python', 'AI']
    doc2 = MyDocument(content='Core content updated', tags_=['docarray'])

    assert doc1.content == 'Core content updated'
    assert doc1.title == 'Title'
    assert doc1.tags_ == ['python', 'AI', 'docarray']

    :param other: The Document with which to update the contents of this
    if not _similar_schemas(self, other):
        raise Exception(
            f'Update operation can only be applied to '
            f'Documents of the same schema. '
            f'Trying to update Document of type '
            f'{type(self)} with Document of type '
    from collections import namedtuple

    from docarray import DocList
    from docarray.utils.reduce import reduce

    # Declaring namedtuple()
    _FieldGroups = namedtuple(


    def _group_fields(doc: 'UpdateMixin') -> _FieldGroups:
        simple_non_empty_fields: List[str] = []
        list_fields: List[str] = []
        set_fields: List[str] = []
        dict_fields: List[str] = []
        nested_docs_fields: List[str] = []
        nested_docarray_fields: List[str] = []

        for field_name, field in doc._docarray_fields().items():
            if field_name not in FORBIDDEN_FIELDS_TO_UPDATE:
                field_type = doc._get_field_annotation(field_name)

                if isinstance(field_type, type) and safe_issubclass(
                    field_type, DocList
                    origin = get_origin(field_type)
                    if origin is list:
                    elif origin is set:
                    elif origin is dict:
                        v = getattr(doc, field_name)
                        if v is not None:
                            if isinstance(v, UpdateMixin):
        return _FieldGroups(

    doc1_fields = _group_fields(self)
    doc2_fields = _group_fields(other)

    for field in doc2_fields.simple_non_empty_fields:
        setattr(self, field, getattr(other, field))

    for field in set(
        doc1_fields.nested_docs_fields + doc2_fields.nested_docs_fields
        sub_doc_1: T = getattr(self, field)
        sub_doc_2: T = getattr(other, field)
        setattr(self, field, sub_doc_1)

    for field in set(doc1_fields.list_fields + doc2_fields.list_fields):
        array1 = getattr(self, field)
        array2 = getattr(other, field)
        if array1 is None and array2 is not None:
            setattr(self, field, array2)
        elif array1 is not None and array2 is not None:
            setattr(self, field, array1)

    for field in set(doc1_fields.set_fields + doc2_fields.set_fields):
        array1 = getattr(self, field)
        array2 = getattr(other, field)
        if array1 is None and array2 is not None:
            setattr(self, field, array2)
        elif array1 is not None and array2 is not None:
            setattr(self, field, array1)

    for field in set(
        doc1_fields.nested_docarray_fields + doc2_fields.nested_docarray_fields
        array1 = getattr(self, field)
        array2 = getattr(other, field)
        if array1 is None and array2 is not None:
            setattr(self, field, array2)
        elif array1 is not None and array2 is not None:
            array1 = reduce(array1, array2)
            setattr(self, field, array1)

    for field in set(doc1_fields.dict_fields + doc2_fields.dict_fields):
        dict1 = getattr(self, field)
        dict2 = getattr(other, field)
        if dict1 is None and dict2 is not None:
            setattr(self, field, dict2)
        elif dict1 is not None and dict2 is not None:
            setattr(self, field, dict1)

Bases: Iterable[Tuple[str, Any]]

IOMixin to define all the bytes/protobuf/json related part of BaseDoc

Source code in docarray/base_doc/mixins/
class IOMixin(Iterable[Tuple[str, Any]]):
    IOMixin to define all the bytes/protobuf/json related part of BaseDoc

    _docarray_fields: Dict[str, 'FieldInfo']

    class Config:
        _load_extra_fields_from_protobuf: bool

    def _get_field_annotation(cls, field: str) -> Type:

    def _get_field_annotation_array(cls, field: str) -> Type:
        return cls._get_field_annotation(field)

    def __bytes__(self) -> bytes:
        return self.to_bytes()

    def to_bytes(
        self, protocol: ProtocolType = 'protobuf', compress: Optional[str] = None
    ) -> bytes:
        """Serialize itself into bytes.

        For more Pythonic code, please use ``bytes(...)``.

        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
        :param compress: compression algorithm to use
        :return: the binary serialization in bytes
        import pickle

        if protocol == 'pickle':
            bstr = pickle.dumps(self)
        elif protocol == 'protobuf':
            bstr = self.to_protobuf().SerializePartialToString()
            raise ValueError(
                f'protocol={protocol} is not supported. Can be only `protobuf` or '
                f'pickle protocols 0-5.'
        return _compress_bytes(bstr, algorithm=compress)

    def from_bytes(
        cls: Type[T],
        data: bytes,
        protocol: ProtocolType = 'protobuf',
        compress: Optional[str] = None,
    ) -> T:
        """Build Document object from binary bytes

        :param data: binary bytes
        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
        :param compress: compress method to use
        :return: a Document object
        bstr = _decompress_bytes(data, algorithm=compress)
        if protocol == 'pickle':
            return pickle.loads(bstr)
        elif protocol == 'protobuf':
            from docarray.proto import DocProto

            pb_msg = DocProto()
            return cls.from_protobuf(pb_msg)
            raise ValueError(
                f'protocol={protocol} is not supported. Can be only `protobuf` or '
                f'pickle protocols 0-5.'

    def to_base64(
        self, protocol: ProtocolType = 'protobuf', compress: Optional[str] = None
    ) -> str:
        """Serialize a Document object into as base64 string

        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
        :param compress: compress method to use
        :return: a base64 encoded string
        return base64.b64encode(self.to_bytes(protocol, compress)).decode('utf-8')

    def from_base64(
        cls: Type[T],
        data: str,
        protocol: Literal['pickle', 'protobuf'] = 'pickle',
        compress: Optional[str] = None,
    ) -> T:
        """Build Document object from binary bytes

        :param data: a base64 encoded string
        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
        :param compress: compress method to use
        :return: a Document object
        return cls.from_bytes(base64.b64decode(data), protocol, compress)

    def from_protobuf(cls: Type[T], pb_msg: 'DocProto') -> T:
        """create a Document from a protobuf message

        :param pb_msg: the proto message of the Document
        :return: a Document initialize with the proto data

        fields: Dict[str, Any] = {}
        load_extra_field = (
            if is_pydantic_v2
            else cls.Config._load_extra_fields_from_protobuf
        for field_name in
            if (
                not (load_extra_field)
                and field_name not in cls._docarray_fields().keys()
                continue  # optimization we don't even load the data if the key does not
                # match any field in the cls or in the mapping

            fields[field_name] = cls._get_content_from_node_proto(
      [field_name], field_name

        return cls(**fields)

    def _get_content_from_node_proto(
        value: 'NodeProto',
        field_name: Optional[str] = None,
        field_type: Optional[Type] = None,
    ) -> Any:
        load the proto data from a node proto

        :param value: the proto node value
        :param field_name: the name of the field
        :return: the loaded field
        if field_name is not None and field_type is not None:
            raise ValueError("field_type and field_name cannot be both passed")

        field_type = field_type or (
            cls._get_field_annotation(field_name) if field_name else None

        content_type_dict = _PROTO_TYPE_NAME_TO_CLASS

        content_key = value.WhichOneof('content')
        docarray_type = (
            value.type if value.WhichOneof('docarray_type') is not None else None

        return_field: Any
        if docarray_type in content_type_dict:
            return_field = content_type_dict[docarray_type].from_protobuf(
                getattr(value, content_key)
        elif content_key == 'doc':
            if field_type is None:
                raise ValueError(
                    'field_type cannot be None when trying to deserialize a BaseDoc'
                return_field = field_type.from_protobuf(
                    getattr(value, content_key)
                )  # we get to the parent class
            except Exception:
                if get_origin(field_type) is Union:
                    raise ValueError(
                        'Union type is not supported for proto deserialization. Please use JSON serialization instead'
                raise ValueError(
                    f'{field_type} is not supported for proto deserialization'
        elif content_key == 'doc_array':
            if field_type is not None and field_name is None:
                return_field = field_type.from_protobuf(getattr(value, content_key))
            elif field_name is not None:
                return_field = cls._get_field_annotation_array(
                    getattr(value, content_key)
                )  # we get to the parent class
                raise ValueError(
                    'field_name and field_type cannot be None when trying to deserialize a DocArray'
        elif content_key is None:
            return_field = None
        elif docarray_type is None:
            arg_to_container: Dict[str, Callable] = {
                'list': list,
                'set': set,
                'tuple': tuple,

            if content_key in ['text', 'blob', 'integer', 'float', 'boolean']:
                return_field = getattr(value, content_key)

            elif content_key in arg_to_container.keys():
                if field_name and field_name in cls._docarray_fields():
                    field_type = cls._get_field_inner_type(field_name)

                if isinstance(field_type, GenericAlias):
                    field_type = get_args(field_type)[0]

                return_field = arg_to_container[content_key](
                    cls._get_content_from_node_proto(node, field_type=field_type)
                    for node in getattr(value, content_key).data

            elif content_key == 'dict':
                deser_dict: Dict[str, Any] = dict()

                if field_name and field_name in cls._docarray_fields():
                    if is_pydantic_v2:
                        dict_args = get_args(
                        if len(dict_args) < 2:
                            field_type = Any
                            field_type = dict_args[1]
                        field_type = cls._docarray_fields()[field_name].type_

                    field_type = None

                for key_name, node in
                    deser_dict[key_name] = cls._get_content_from_node_proto(
                        node, field_type=field_type
                return_field = deser_dict
                raise ValueError(
                    f'key {content_key} is not supported for deserialization'

            raise ValueError(
                f'type {docarray_type}, with key {content_key} is not supported for'
                f' deserialization'

        return return_field

    def to_protobuf(self: T) -> 'DocProto':
        """Convert Document into a Protobuf message.

        :return: the protobuf message
        from docarray.proto import DocProto

        data = {}
        for field, value in self:
                data[field] = _type_to_protobuf(value)
            except RecursionError as ex:
                if len(ex.args) >= 1:
                    ex.args = (
                            f'Field `{field}` contains cyclic reference in memory. '
                            'Could it be your Document is referring to itself?'
                raise ex
            except Exception as ex:
                if len(ex.args) >= 1:
                    ex.args = (f'Field `{field}` is problematic',) + ex.args
                raise ex

        return DocProto(data=data)

    def _to_node_protobuf(self) -> 'NodeProto':
        """Convert Document into a NodeProto protobuf message. This function should be
        called when the Document is nest into another Document that need to be
        converted into a protobuf

        :return: the nested item protobuf message
        from docarray.proto import NodeProto

        return NodeProto(doc=self.to_protobuf())

    def _get_access_paths(cls) -> List[str]:
        Get "__"-separated access paths of all fields, including nested ones.

        :return: list of all access paths
        from docarray import BaseDoc

        paths = []
        for field in cls._docarray_fields().keys():
            field_type = cls._get_field_annotation(field)
            if not is_union_type(field_type) and safe_issubclass(field_type, BaseDoc):
                sub_paths = field_type._get_access_paths()
                for path in sub_paths:
        return paths

    def from_json(
        cls: Type[T],
        data: str,
    ) -> T:
        """Build Document object from json data
        :return: a Document object
        # TODO: add tests

        if is_pydantic_v2:
            return cls.model_validate_json(data)
            return cls.parse_raw(data)

Source code in docarray/base_doc/mixins/
class UpdateMixin:
    _docarray_fields: Dict[str, 'ModelField']

    def _get_string_for_regex_filter(self):
        return str(self)

    def _get_field_annotation(cls, field: str) -> Type['UpdateMixin']:

    def update(self, other: T):
        Updates self with the content of other. Changes are applied to self.
        Updating one Document with another consists in the following:

         - Setting data properties of the second Document to the first Document
         if they are not None
         - Concatenating lists and updating sets
         - Updating recursively Documents and DocLists
         - Updating Dictionaries of the left with the right

        It behaves as an update operation for Dictionaries, except that since
        it is applied to a static schema type, the presence of the field is
        given by the field not having a None value and that DocLists,
        lists and sets are concatenated. It is worth mentioning that Tuples
        are not merged together since they are meant to be immutable,
        so they behave as regular types and the value of `self` is updated
        with the value of `other`.


        from typing import List, Optional

        from docarray import BaseDoc

        class MyDocument(BaseDoc):
            content: str
            title: Optional[str] = None
            tags_: List

        doc1 = MyDocument(
            content='Core content of the document', title='Title', tags_=['python', 'AI']
        doc2 = MyDocument(content='Core content updated', tags_=['docarray'])

        assert doc1.content == 'Core content updated'
        assert doc1.title == 'Title'
        assert doc1.tags_ == ['python', 'AI', 'docarray']

        :param other: The Document with which to update the contents of this
        if not _similar_schemas(self, other):
            raise Exception(
                f'Update operation can only be applied to '
                f'Documents of the same schema. '
                f'Trying to update Document of type '
                f'{type(self)} with Document of type '
        from collections import namedtuple

        from docarray import DocList
        from docarray.utils.reduce import reduce

        # Declaring namedtuple()
        _FieldGroups = namedtuple(


        def _group_fields(doc: 'UpdateMixin') -> _FieldGroups:
            simple_non_empty_fields: List[str] = []
            list_fields: List[str] = []
            set_fields: List[str] = []
            dict_fields: List[str] = []
            nested_docs_fields: List[str] = []
            nested_docarray_fields: List[str] = []

            for field_name, field in doc._docarray_fields().items():
                if field_name not in FORBIDDEN_FIELDS_TO_UPDATE:
                    field_type = doc._get_field_annotation(field_name)

                    if isinstance(field_type, type) and safe_issubclass(
                        field_type, DocList
                        origin = get_origin(field_type)
                        if origin is list:
                        elif origin is set:
                        elif origin is dict:
                            v = getattr(doc, field_name)
                            if v is not None:
                                if isinstance(v, UpdateMixin):
            return _FieldGroups(

        doc1_fields = _group_fields(self)
        doc2_fields = _group_fields(other)

        for field in doc2_fields.simple_non_empty_fields:
            setattr(self, field, getattr(other, field))

        for field in set(
            doc1_fields.nested_docs_fields + doc2_fields.nested_docs_fields
            sub_doc_1: T = getattr(self, field)
            sub_doc_2: T = getattr(other, field)
            setattr(self, field, sub_doc_1)

        for field in set(doc1_fields.list_fields + doc2_fields.list_fields):
            array1 = getattr(self, field)
            array2 = getattr(other, field)
            if array1 is None and array2 is not None:
                setattr(self, field, array2)
            elif array1 is not None and array2 is not None:
                setattr(self, field, array1)

        for field in set(doc1_fields.set_fields + doc2_fields.set_fields):
            array1 = getattr(self, field)
            array2 = getattr(other, field)
            if array1 is None and array2 is not None:
                setattr(self, field, array2)
            elif array1 is not None and array2 is not None:
                setattr(self, field, array1)

        for field in set(
            doc1_fields.nested_docarray_fields + doc2_fields.nested_docarray_fields
            array1 = getattr(self, field)
            array2 = getattr(other, field)
            if array1 is None and array2 is not None:
                setattr(self, field, array2)
            elif array1 is not None and array2 is not None:
                array1 = reduce(array1, array2)
                setattr(self, field, array1)

        for field in set(doc1_fields.dict_fields + doc2_fields.dict_fields):
            dict1 = getattr(self, field)
            dict2 = getattr(other, field)
            if dict1 is None and dict2 is not None:
                setattr(self, field, dict2)
            elif dict1 is not None and dict2 is not None:
                setattr(self, field, dict1)


