ElasticDocIndex

`docarray.index.backends.elastic.ElasticDocIndex`

Bases: BaseDocIndex, Generic[TSchema]

Source code in docarray/index/backends/elastic.py

class ElasticDocIndex(BaseDocIndex, Generic[TSchema]):
    _index_vector_params: Optional[Tuple[str]] = ('dims', 'similarity', 'index')
    _index_vector_options: Optional[Tuple[str]] = ('m', 'ef_construction')

    def __init__(self, db_config=None, **kwargs):
        """Initialize ElasticDocIndex"""
        super().__init__(db_config=db_config, **kwargs)
        self._db_config = cast(ElasticDocIndex.DBConfig, self._db_config)

        self._logger.debug('Elastic Search index is being initialized')

        # ElasticSearch client creation
        self._client = Elasticsearch(
            hosts=self._db_config.hosts,
            **self._db_config.es_config,
        )
        self._logger.debug('ElasticSearch client has been created')

        # ElasticSearh index setup
        mappings: Dict[str, Any] = {
            'dynamic': True,
            '_source': {'enabled': 'true'},
            'properties': {},
        }
        mappings.update(self._db_config.index_mappings)

        self._logger.debug('Mappings have been updated with db_config.index_mappings')

        for col_name, col in self._column_infos.items():
            if safe_issubclass(col.docarray_type, AnyDocArray):
                continue
            if col.db_type == 'dense_vector' and (
                not col.n_dim and col.config['dims'] < 0
            ):
                self._logger.info(
                    f'Not indexing column {col_name}, the dimensionality is not specified'
                )
                continue

            mappings['properties'][col_name] = self._create_index_mapping(col)
            self._logger.debug(f'Index mapping created for column {col_name}')

        if self._client.indices.exists(index=self.index_name):
            self._client_put_mapping(mappings)
            self._logger.debug(f'Put mapping for index {self.index_name}')
        else:
            self._client_create(mappings)
            self._logger.debug(f'Created new index {self.index_name} with mappings')

        if len(self._db_config.index_settings):
            self._client_put_settings(self._db_config.index_settings)
            self._logger.debug('Updated index settings')

        self._refresh(self.index_name)
        self._logger.debug(f'Refreshed index {self.index_name}')

    @property
    def index_name(self):
        default_index_name = (
            self._schema.__name__.lower() if self._schema is not None else None
        )
        if default_index_name is None:
            err_msg = (
                'A ElasticDocIndex must be typed with a Document type.To do so, use the syntax: '
                'ElasticDocIndex[DocumentType] '
            )

            self._logger.error(err_msg)
            raise ValueError(err_msg)
        index_name = self._db_config.index_name or default_index_name
        return index_name

    ###############################################
    # Inner classes for query builder and configs #
    ###############################################
    class QueryBuilder(BaseDocIndex.QueryBuilder):
        def __init__(self, outer_instance, **kwargs):
            super().__init__()
            self._outer_instance = outer_instance
            self._query: Dict[str, Any] = {
                'query': defaultdict(lambda: defaultdict(list))
            }

        def build(self, *args, **kwargs) -> Any:
            """Build the elastic search query object."""
            self._outer_instance._logger.debug(
                'Building the Elastic Search query object'
            )

            if len(self._query['query']) == 0:
                del self._query['query']
            elif 'knn' in self._query:
                self._query['knn']['filter'] = self._query['query']
                del self._query['query']

            return self._query

        def find(
            self,
            query: Union[AnyTensor, BaseDoc],
            search_field: str = 'embedding',
            limit: int = 10,
            num_candidates: Optional[int] = None,
        ):
            """
            Find k-nearest neighbors of the query.

            :param query: query vector for KNN/ANN search. Has single axis.
            :param search_field: name of the field to search on
            :param limit: maximum number of documents to return per query
            :param num_candidates: number of candidates
            :return: self
            """
            self._outer_instance._logger.debug('Executing find query')

            self._outer_instance._validate_search_field(search_field)
            if isinstance(query, BaseDoc):
                query_vec = BaseDocIndex._get_values_by_column([query], search_field)[0]
            else:
                query_vec = query
            query_vec_np = BaseDocIndex._to_numpy(self._outer_instance, query_vec)
            self._query['knn'] = self._outer_instance._form_search_body(
                query_vec_np,
                limit,
                search_field,
                num_candidates,
            )['knn']

            return self

        # filter accepts Leaf/Compound query clauses
        # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
        def filter(self, query: Dict[str, Any], limit: int = 10):
            """Find documents in the index based on a filter query

            :param query: the query to execute
            :param limit: maximum number of documents to return
            :return: self
            """
            self._outer_instance._logger.debug('Executing filter query')

            self._query['size'] = limit
            self._query['query']['bool']['filter'].append(query)
            return self

        def text_search(self, query: str, search_field: str = 'text', limit: int = 10):
            """Find documents in the index based on a text search query

            :param query: The text to search for
            :param search_field: name of the field to search on
            :param limit: maximum number of documents to find
            :return: self
            """
            self._outer_instance._logger.debug('Executing text search query')

            self._outer_instance._validate_search_field(search_field)
            self._query['size'] = limit
            self._query['query']['bool']['must'].append(
                {'match': {search_field: query}}
            )
            return self

        find_batched = _raise_not_composable('find_batched')
        filter_batched = _raise_not_composable('filter_batched')
        text_search_batched = _raise_not_composable('text_search_batched')

    def build_query(self, **kwargs) -> QueryBuilder:
        """
        Build a query for ElasticDocIndex.
        :param kwargs: parameters to forward to QueryBuilder initialization
        :return: QueryBuilder object
        """
        return self.QueryBuilder(self, **kwargs)

    @dataclass
    class DBConfig(BaseDocIndex.DBConfig):
        """Dataclass that contains all "static" configurations of ElasticDocIndex."""

        hosts: Union[
            str, List[Union[str, Mapping[str, Union[str, int]], NodeConfig]], None
        ] = 'http://localhost:9200'
        index_name: Optional[str] = None
        es_config: Dict[str, Any] = field(default_factory=dict)
        index_settings: Dict[str, Any] = field(default_factory=dict)
        index_mappings: Dict[str, Any] = field(default_factory=dict)
        default_column_config: Dict[Any, Dict[str, Any]] = field(default_factory=dict)

        def __post_init__(self):
            self.default_column_config = {
                'binary': {},
                'boolean': {},
                'keyword': {},
                'long': {},
                'integer': {},
                'short': {},
                'byte': {},
                'double': {},
                'float': {},
                'half_float': {},
                'scaled_float': {},
                'unsigned_long': {},
                'dates': {},
                'alias': {},
                'object': {},
                'flattened': {},
                'nested': {},
                'join': {},
                'integer_range': {},
                'float_range': {},
                'long_range': {},
                'double_range': {},
                'date_range': {},
                'ip_range': {},
                'ip': {},
                'version': {},
                'histogram': {},
                'text': {},
                'annotated_text': {},
                'completion': {},
                'search_as_you_type': {},
                'token_count': {},
                'sparse_vector': {},
                'rank_feature': {},
                'rank_features': {},
                'geo_point': {},
                'geo_shape': {},
                'point': {},
                'shape': {},
                'percolator': {},
                # `None` is not a Type, but we allow it here anyway
                None: {},  # type: ignore
            }
            self.default_column_config['dense_vector'] = self.dense_vector_config()

        def dense_vector_config(self):
            """Get the dense vector config."""

            config = {
                'dims': -1,
                'index': True,
                'similarity': 'cosine',  # 'l2_norm', 'dot_product', 'cosine'
                'm': 16,
                'ef_construction': 100,
                'num_candidates': 10000,
            }

            return config

    @dataclass
    class RuntimeConfig(BaseDocIndex.RuntimeConfig):
        """Dataclass that contains all "dynamic" configurations of ElasticDocIndex."""

        chunk_size: int = 500

    ###############################################
    # Implementation of abstract methods          #
    ###############################################

    def python_type_to_db_type(self, python_type: Type) -> Any:
        """Map python type to database type.
        Takes any python type and returns the corresponding database column type.

        :param python_type: a python type.
        :return: the corresponding database column type,
            or None if ``python_type`` is not supported.
        """
        self._logger.debug(f'Mapping Python type {python_type} to database type')

        for allowed_type in ELASTIC_PY_VEC_TYPES:
            if safe_issubclass(python_type, allowed_type):
                self._logger.info(
                    f'Mapped Python type {python_type} to database type "dense_vector"'
                )
                return 'dense_vector'

        elastic_py_types = {
            docarray.typing.ID: 'keyword',
            docarray.typing.AnyUrl: 'keyword',
            bool: 'boolean',
            int: 'integer',
            float: 'float',
            str: 'text',
            bytes: 'binary',
            dict: 'object',
        }

        for type in elastic_py_types.keys():
            if safe_issubclass(python_type, type):
                self._logger.info(
                    f'Mapped Python type {python_type} to database type "{elastic_py_types[type]}"'
                )
                return elastic_py_types[type]

        err_msg = f'Unsupported column type for {type(self)}: {python_type}'
        self._logger.error(err_msg)
        raise ValueError(err_msg)

    def _index(
        self,
        column_to_data: Mapping[str, Generator[Any, None, None]],
        refresh: bool = True,
        chunk_size: Optional[int] = None,
    ):
        self._index_subindex(column_to_data)

        data = self._transpose_col_value_dict(column_to_data)
        requests = []

        for row in data:
            request = {
                '_index': self.index_name,
                '_id': row['id'],
            }
            for col_name, col in self._column_infos.items():
                if safe_issubclass(col.docarray_type, AnyDocArray):
                    continue
                if col.db_type == 'dense_vector' and np.all(row[col_name] == 0):
                    row[col_name] = row[col_name] + 1.0e-9
                if row[col_name] is None:
                    continue
                request[col_name] = row[col_name]
            requests.append(request)

        _, warning_info = self._send_requests(requests, chunk_size)
        for info in warning_info:
            warnings.warn(str(info))
            self._logger.warning('Warning: %s', str(info))

        if refresh:
            self._logger.debug('Refreshing the index')
            self._refresh(self.index_name)

    def num_docs(self) -> int:
        """
        Get the number of documents.
        """
        self._logger.debug('Getting the number of documents in the index')
        return self._client.count(index=self.index_name)['count']

    def _del_items(
        self,
        doc_ids: Sequence[str],
        chunk_size: Optional[int] = None,
    ):
        requests = []
        for _id in doc_ids:
            requests.append(
                {'_op_type': 'delete', '_index': self.index_name, '_id': _id}
            )

        _, warning_info = self._send_requests(requests, chunk_size)

        # raise warning if some ids are not found
        if warning_info:
            ids = [info['delete']['_id'] for info in warning_info]
            warnings.warn(f'No document with id {ids} found')

        self._refresh(self.index_name)

    def _get_items(self, doc_ids: Sequence[str]) -> Sequence[Dict[str, Any]]:
        accumulated_docs = []
        accumulated_docs_id_not_found = []

        es_rows = self._client_mget(doc_ids)['docs']

        for row in es_rows:
            if row['found']:
                doc_dict = row['_source']
                accumulated_docs.append(doc_dict)
            else:
                accumulated_docs_id_not_found.append(row['_id'])

        # raise warning if some ids are not found
        if accumulated_docs_id_not_found:
            warnings.warn(f'No document with id {accumulated_docs_id_not_found} found')

        return accumulated_docs

    def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
        """
        Execute a query on the ElasticDocIndex.

        Can take two kinds of inputs:

        1. A native query of the underlying database. This is meant as a passthrough so that you
        can enjoy any functionality that is not available through the Document index API.
        2. The output of this Document index' `QueryBuilder.build()` method.

        :param query: the query to execute
        :param args: positional arguments to pass to the query
        :param kwargs: keyword arguments to pass to the query
        :return: the result of the query
        """
        self._logger.debug(f'Executing query: {query}')

        if args or kwargs:
            err_msg = (
                f'args and kwargs not supported for `execute_query` on {type(self)}'
            )
            self._logger.error(err_msg)
            raise ValueError(err_msg)

        resp = self._client.search(index=self.index_name, **query)
        docs, scores = self._format_response(resp)

        return _FindResult(documents=docs, scores=parse_obj_as(NdArray, scores))

    def _find(
        self, query: np.ndarray, limit: int, search_field: str = ''
    ) -> _FindResult:
        body = self._form_search_body(query, limit, search_field)

        resp = self._client_search(**body)

        docs, scores = self._format_response(resp)

        return _FindResult(documents=docs, scores=parse_obj_as(NdArray, scores))

    def _find_batched(
        self,
        queries: np.ndarray,
        limit: int,
        search_field: str = '',
    ) -> _FindResultBatched:
        request = []
        for query in queries:
            head = {'index': self.index_name}
            body = self._form_search_body(query, limit, search_field)
            request.extend([head, body])

        responses = self._client_msearch(request)

        das, scores = zip(
            *[self._format_response(resp) for resp in responses['responses']]
        )
        return _FindResultBatched(documents=list(das), scores=scores)

    def _filter(
        self,
        filter_query: Dict[str, Any],
        limit: int,
    ) -> List[Dict]:
        resp = self._client_search(query=filter_query, size=limit)

        docs, _ = self._format_response(resp)

        return docs

    def _filter_batched(
        self,
        filter_queries: Any,
        limit: int,
    ) -> List[List[Dict]]:
        request = []
        for query in filter_queries:
            head = {'index': self.index_name}
            body = {'query': query, 'size': limit}
            request.extend([head, body])

        responses = self._client_msearch(request)
        das, _ = zip(*[self._format_response(resp) for resp in responses['responses']])

        return list(das)

    def _text_search(
        self,
        query: str,
        limit: int,
        search_field: str = '',
    ) -> _FindResult:
        body = self._form_text_search_body(query, limit, search_field)
        resp = self._client_search(**body)

        docs, scores = self._format_response(resp)

        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore

    def _text_search_batched(
        self,
        queries: Sequence[str],
        limit: int,
        search_field: str = '',
    ) -> _FindResultBatched:
        request = []
        for query in queries:
            head = {'index': self.index_name}
            body = self._form_text_search_body(query, limit, search_field)
            request.extend([head, body])

        responses = self._client_msearch(request)
        das, scores = zip(
            *[self._format_response(resp) for resp in responses['responses']]
        )
        return _FindResultBatched(documents=list(das), scores=scores)

    def _filter_by_parent_id(self, id: str) -> List[str]:
        resp = self._client_search(
            query={'term': {'parent_id': id}}, fields=['id'], _source=False
        )
        ids = [hit['fields']['id'][0] for hit in resp['hits']['hits']]
        return ids

    ###############################################
    # Helpers                                     #
    ###############################################

    @classmethod
    def _create_index_mapping(cls, col: '_ColumnInfo') -> Dict[str, Any]:
        """Create a new HNSW index for a column, and initialize it."""

        index = {'type': col.config['type'] if 'type' in col.config else col.db_type}

        if col.db_type == 'dense_vector':
            if cls._index_vector_params is not None:
                for k in cls._index_vector_params:
                    index[k] = col.config[k]
            if col.n_dim:
                index['dims'] = col.n_dim
            if cls._index_vector_options is not None:
                index['index_options'] = dict(
                    (k, col.config[k]) for k in cls._index_vector_options
                )
                index['index_options']['type'] = 'hnsw'
        return index

    def _send_requests(
        self,
        request: Iterable[Dict[str, Any]],
        chunk_size: Optional[int] = None,
        **kwargs,
    ) -> Tuple[List[Dict], List[Any]]:
        """Send bulk request to Elastic and gather the successful info"""

        accumulated_info = []
        warning_info = []
        for success, info in parallel_bulk(
            self._client,
            request,
            raise_on_error=False,
            raise_on_exception=False,
            chunk_size=chunk_size if chunk_size else self._runtime_config.chunk_size,  # type: ignore
            **kwargs,
        ):
            if not success:
                warning_info.append(info)
            else:
                accumulated_info.append(info)

        return accumulated_info, warning_info

    def _form_search_body(
        self,
        query: np.ndarray,
        limit: int,
        search_field: str = '',
        num_candidates: Optional[int] = None,
    ) -> Dict[str, Any]:
        if not num_candidates:
            num_candidates = self._db_config.default_column_config['dense_vector'][
                'num_candidates'
            ]
        body = {
            'size': limit,
            'knn': {
                'field': search_field,
                'query_vector': query,
                'k': limit,
                'num_candidates': num_candidates,
            },
        }
        return body

    def _form_text_search_body(
        self, query: str, limit: int, search_field: str = ''
    ) -> Dict[str, Any]:
        body = {
            'size': limit,
            'query': {
                'bool': {
                    'must': {'match': {search_field: query}},
                }
            },
        }
        return body

    def _format_response(self, response: Any) -> Tuple[List[Dict], List[Any]]:
        docs = []
        scores = []
        for result in response['hits']['hits']:
            if not isinstance(result, dict):
                result = result.to_dict()

            if result.get('_source', None):
                doc_dict = result['_source']
            else:
                doc_dict = result['fields']
            doc_dict['id'] = result['_id']
            docs.append(doc_dict)
            scores.append(result['_score'])

        return docs, [parse_obj_as(NdArray, np.array(s)) for s in scores]

    def _refresh(self, index_name: str):
        self._client.indices.refresh(index=index_name)

    def _doc_exists(self, doc_id: str) -> bool:
        if len(doc_id) == 0:
            return False
        ret = self._client_mget([doc_id])
        return ret["docs"][0]["found"]

    ###############################################
    # API Wrappers                                #
    ###############################################

    def _client_put_mapping(self, mappings: Dict[str, Any]):
        self._client.indices.put_mapping(
            index=self.index_name, properties=mappings['properties']
        )

    def _client_create(self, mappings: Dict[str, Any]):
        self._client.indices.create(index=self.index_name, mappings=mappings)

    def _client_put_settings(self, settings: Dict[str, Any]):
        self._client.indices.put_settings(index=self.index_name, settings=settings)

    def _client_mget(self, ids: Sequence[str]):
        return self._client.mget(index=self.index_name, ids=ids)

    def _client_search(self, **kwargs):
        return self._client.search(index=self.index_name, **kwargs)

    def _client_msearch(self, request: List[Dict[str, Any]]):
        return self._client.msearch(index=self.index_name, searches=request)

`DBConfig` `dataclass`

Bases: DBConfig

Dataclass that contains all "static" configurations of ElasticDocIndex.

Source code in docarray/index/backends/elastic.py

@dataclass
class DBConfig(BaseDocIndex.DBConfig):
    """Dataclass that contains all "static" configurations of ElasticDocIndex."""

    hosts: Union[
        str, List[Union[str, Mapping[str, Union[str, int]], NodeConfig]], None
    ] = 'http://localhost:9200'
    index_name: Optional[str] = None
    es_config: Dict[str, Any] = field(default_factory=dict)
    index_settings: Dict[str, Any] = field(default_factory=dict)
    index_mappings: Dict[str, Any] = field(default_factory=dict)
    default_column_config: Dict[Any, Dict[str, Any]] = field(default_factory=dict)

    def __post_init__(self):
        self.default_column_config = {
            'binary': {},
            'boolean': {},
            'keyword': {},
            'long': {},
            'integer': {},
            'short': {},
            'byte': {},
            'double': {},
            'float': {},
            'half_float': {},
            'scaled_float': {},
            'unsigned_long': {},
            'dates': {},
            'alias': {},
            'object': {},
            'flattened': {},
            'nested': {},
            'join': {},
            'integer_range': {},
            'float_range': {},
            'long_range': {},
            'double_range': {},
            'date_range': {},
            'ip_range': {},
            'ip': {},
            'version': {},
            'histogram': {},
            'text': {},
            'annotated_text': {},
            'completion': {},
            'search_as_you_type': {},
            'token_count': {},
            'sparse_vector': {},
            'rank_feature': {},
            'rank_features': {},
            'geo_point': {},
            'geo_shape': {},
            'point': {},
            'shape': {},
            'percolator': {},
            # `None` is not a Type, but we allow it here anyway
            None: {},  # type: ignore
        }
        self.default_column_config['dense_vector'] = self.dense_vector_config()

    def dense_vector_config(self):
        """Get the dense vector config."""

        config = {
            'dims': -1,
            'index': True,
            'similarity': 'cosine',  # 'l2_norm', 'dot_product', 'cosine'
            'm': 16,
            'ef_construction': 100,
            'num_candidates': 10000,
        }

        return config

`dense_vector_config()`

Get the dense vector config.

Source code in docarray/index/backends/elastic.py

def dense_vector_config(self):
    """Get the dense vector config."""

    config = {
        'dims': -1,
        'index': True,
        'similarity': 'cosine',  # 'l2_norm', 'dot_product', 'cosine'
        'm': 16,
        'ef_construction': 100,
        'num_candidates': 10000,
    }

    return config

`QueryBuilder`

Bases: QueryBuilder

Source code in docarray/index/backends/elastic.py

class QueryBuilder(BaseDocIndex.QueryBuilder):
    def __init__(self, outer_instance, **kwargs):
        super().__init__()
        self._outer_instance = outer_instance
        self._query: Dict[str, Any] = {
            'query': defaultdict(lambda: defaultdict(list))
        }

    def build(self, *args, **kwargs) -> Any:
        """Build the elastic search query object."""
        self._outer_instance._logger.debug(
            'Building the Elastic Search query object'
        )

        if len(self._query['query']) == 0:
            del self._query['query']
        elif 'knn' in self._query:
            self._query['knn']['filter'] = self._query['query']
            del self._query['query']

        return self._query

    def find(
        self,
        query: Union[AnyTensor, BaseDoc],
        search_field: str = 'embedding',
        limit: int = 10,
        num_candidates: Optional[int] = None,
    ):
        """
        Find k-nearest neighbors of the query.

        :param query: query vector for KNN/ANN search. Has single axis.
        :param search_field: name of the field to search on
        :param limit: maximum number of documents to return per query
        :param num_candidates: number of candidates
        :return: self
        """
        self._outer_instance._logger.debug('Executing find query')

        self._outer_instance._validate_search_field(search_field)
        if isinstance(query, BaseDoc):
            query_vec = BaseDocIndex._get_values_by_column([query], search_field)[0]
        else:
            query_vec = query
        query_vec_np = BaseDocIndex._to_numpy(self._outer_instance, query_vec)
        self._query['knn'] = self._outer_instance._form_search_body(
            query_vec_np,
            limit,
            search_field,
            num_candidates,
        )['knn']

        return self

    # filter accepts Leaf/Compound query clauses
    # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
    def filter(self, query: Dict[str, Any], limit: int = 10):
        """Find documents in the index based on a filter query

        :param query: the query to execute
        :param limit: maximum number of documents to return
        :return: self
        """
        self._outer_instance._logger.debug('Executing filter query')

        self._query['size'] = limit
        self._query['query']['bool']['filter'].append(query)
        return self

    def text_search(self, query: str, search_field: str = 'text', limit: int = 10):
        """Find documents in the index based on a text search query

        :param query: The text to search for
        :param search_field: name of the field to search on
        :param limit: maximum number of documents to find
        :return: self
        """
        self._outer_instance._logger.debug('Executing text search query')

        self._outer_instance._validate_search_field(search_field)
        self._query['size'] = limit
        self._query['query']['bool']['must'].append(
            {'match': {search_field: query}}
        )
        return self

    find_batched = _raise_not_composable('find_batched')
    filter_batched = _raise_not_composable('filter_batched')
    text_search_batched = _raise_not_composable('text_search_batched')

`build(*args, **kwargs)`

Build the elastic search query object.

Source code in docarray/index/backends/elastic.py

def build(self, *args, **kwargs) -> Any:
    """Build the elastic search query object."""
    self._outer_instance._logger.debug(
        'Building the Elastic Search query object'
    )

    if len(self._query['query']) == 0:
        del self._query['query']
    elif 'knn' in self._query:
        self._query['knn']['filter'] = self._query['query']
        del self._query['query']

    return self._query

`filter(query, limit=10)`

Find documents in the index based on a filter query

Parameters:

Name	Type	Description	Default
`query`	`Dict[str, Any]`	the query to execute	required
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
	self

Source code in docarray/index/backends/elastic.py

def filter(self, query: Dict[str, Any], limit: int = 10):
    """Find documents in the index based on a filter query

    :param query: the query to execute
    :param limit: maximum number of documents to return
    :return: self
    """
    self._outer_instance._logger.debug('Executing filter query')

    self._query['size'] = limit
    self._query['query']['bool']['filter'].append(query)
    return self

`find(query, search_field='embedding', limit=10, num_candidates=None)`

Find k-nearest neighbors of the query.

Parameters:

Name	Type	Description	Default
`query`	`Union[AnyTensor, BaseDoc]`	query vector for KNN/ANN search. Has single axis.	required
`search_field`	`str`	name of the field to search on	`'embedding'`
`limit`	`int`	maximum number of documents to return per query	`10`
`num_candidates`	`Optional[int]`	number of candidates	`None`

Returns:

Type	Description
	self

Source code in docarray/index/backends/elastic.py

def find(
    self,
    query: Union[AnyTensor, BaseDoc],
    search_field: str = 'embedding',
    limit: int = 10,
    num_candidates: Optional[int] = None,
):
    """
    Find k-nearest neighbors of the query.

    :param query: query vector for KNN/ANN search. Has single axis.
    :param search_field: name of the field to search on
    :param limit: maximum number of documents to return per query
    :param num_candidates: number of candidates
    :return: self
    """
    self._outer_instance._logger.debug('Executing find query')

    self._outer_instance._validate_search_field(search_field)
    if isinstance(query, BaseDoc):
        query_vec = BaseDocIndex._get_values_by_column([query], search_field)[0]
    else:
        query_vec = query
    query_vec_np = BaseDocIndex._to_numpy(self._outer_instance, query_vec)
    self._query['knn'] = self._outer_instance._form_search_body(
        query_vec_np,
        limit,
        search_field,
        num_candidates,
    )['knn']

    return self

`text_search(query, search_field='text', limit=10)`

Find documents in the index based on a text search query

Parameters:

Name	Type	Description	Default
`query`	`str`	The text to search for	required
`search_field`	`str`	name of the field to search on	`'text'`
`limit`	`int`	maximum number of documents to find	`10`

Returns:

Type	Description
	self

Source code in docarray/index/backends/elastic.py

def text_search(self, query: str, search_field: str = 'text', limit: int = 10):
    """Find documents in the index based on a text search query

    :param query: The text to search for
    :param search_field: name of the field to search on
    :param limit: maximum number of documents to find
    :return: self
    """
    self._outer_instance._logger.debug('Executing text search query')

    self._outer_instance._validate_search_field(search_field)
    self._query['size'] = limit
    self._query['query']['bool']['must'].append(
        {'match': {search_field: query}}
    )
    return self

`RuntimeConfig` `dataclass`

Bases: RuntimeConfig

Dataclass that contains all "dynamic" configurations of ElasticDocIndex.

Source code in docarray/index/backends/elastic.py

@dataclass
class RuntimeConfig(BaseDocIndex.RuntimeConfig):
    """Dataclass that contains all "dynamic" configurations of ElasticDocIndex."""

    chunk_size: int = 500

`contains(item)`

Checks if a given document exists in the index.

Parameters:

Name	Type	Description	Default
`item`	`BaseDoc`	The document to check. It must be an instance of BaseDoc or its subclass.	required

Returns:

Type	Description
`bool`	True if the document exists in the index, False otherwise.

Source code in docarray/index/abstract.py

def __contains__(self, item: BaseDoc) -> bool:
    """
    Checks if a given document exists in the index.

    :param item: The document to check.
        It must be an instance of BaseDoc or its subclass.
    :return: True if the document exists in the index, False otherwise.
    """
    if safe_issubclass(type(item), BaseDoc):
        return self._doc_exists(str(item.id))
    else:
        raise TypeError(
            f"item must be an instance of BaseDoc or its subclass, not '{type(item).__name__}'"
        )

`delitem(key)`

Delete one or multiple Documents from the index, by id. If no document is found, a KeyError is raised.

Parameters:

Name	Type	Description	Default
`key`	`Union[str, Sequence[str]]`	id or ids to delete from the Document index	required

Source code in docarray/index/abstract.py

def __delitem__(self, key: Union[str, Sequence[str]]):
    """Delete one or multiple Documents from the index, by `id`.
    If no document is found, a KeyError is raised.

    :param key: id or ids to delete from the Document index
    """
    self._logger.info(f'Deleting documents with id(s) {key} from the index')
    if isinstance(key, str):
        key = [key]

    # delete nested data
    for field_name, type_, _ in self._flatten_schema(
        cast(Type[BaseDoc], self._schema)
    ):
        if safe_issubclass(type_, AnyDocArray):
            for doc_id in key:
                nested_docs_id = self._subindices[field_name]._filter_by_parent_id(
                    doc_id
                )
                if nested_docs_id:
                    del self._subindices[field_name][nested_docs_id]
    # delete data
    self._del_items(key)

`getitem(key)`

Get one or multiple Documents into the index, by id. If no document is found, a KeyError is raised.

Parameters:

Name	Type	Description	Default
`key`	`Union[str, Sequence[str]]`	id or ids to get from the Document index	required

Source code in docarray/index/abstract.py

def __getitem__(
    self, key: Union[str, Sequence[str]]
) -> Union[TSchema, DocList[TSchema]]:
    """Get one or multiple Documents into the index, by `id`.
    If no document is found, a KeyError is raised.

    :param key: id or ids to get from the Document index
    """
    # normalize input
    if isinstance(key, str):
        return_singleton = True
        key = [key]
    else:
        return_singleton = False

    # retrieve data
    doc_sequence = self._get_items(key)

    # check data
    if len(doc_sequence) == 0:
        raise KeyError(f'No document with id {key} found')

    # retrieve nested data
    for field_name, type_, _ in self._flatten_schema(
        cast(Type[BaseDoc], self._schema)
    ):
        if safe_issubclass(type_, AnyDocArray) and isinstance(
            doc_sequence[0], Dict
        ):
            for doc in doc_sequence:
                self._get_subindex_doclist(doc, field_name)  # type: ignore

    # cast output
    if isinstance(doc_sequence, DocList):
        out_docs: DocList[TSchema] = doc_sequence
    elif isinstance(doc_sequence[0], Dict):
        out_docs = self._dict_list_to_docarray(doc_sequence)  # type: ignore
    else:
        docs_cls = DocList.__class_getitem__(cast(Type[BaseDoc], self._schema))
        out_docs = docs_cls(doc_sequence)

    return out_docs[0] if return_singleton else out_docs

`init(db_config=None, **kwargs)`

Initialize ElasticDocIndex

Source code in docarray/index/backends/elastic.py

def __init__(self, db_config=None, **kwargs):
    """Initialize ElasticDocIndex"""
    super().__init__(db_config=db_config, **kwargs)
    self._db_config = cast(ElasticDocIndex.DBConfig, self._db_config)

    self._logger.debug('Elastic Search index is being initialized')

    # ElasticSearch client creation
    self._client = Elasticsearch(
        hosts=self._db_config.hosts,
        **self._db_config.es_config,
    )
    self._logger.debug('ElasticSearch client has been created')

    # ElasticSearh index setup
    mappings: Dict[str, Any] = {
        'dynamic': True,
        '_source': {'enabled': 'true'},
        'properties': {},
    }
    mappings.update(self._db_config.index_mappings)

    self._logger.debug('Mappings have been updated with db_config.index_mappings')

    for col_name, col in self._column_infos.items():
        if safe_issubclass(col.docarray_type, AnyDocArray):
            continue
        if col.db_type == 'dense_vector' and (
            not col.n_dim and col.config['dims'] < 0
        ):
            self._logger.info(
                f'Not indexing column {col_name}, the dimensionality is not specified'
            )
            continue

        mappings['properties'][col_name] = self._create_index_mapping(col)
        self._logger.debug(f'Index mapping created for column {col_name}')

    if self._client.indices.exists(index=self.index_name):
        self._client_put_mapping(mappings)
        self._logger.debug(f'Put mapping for index {self.index_name}')
    else:
        self._client_create(mappings)
        self._logger.debug(f'Created new index {self.index_name} with mappings')

    if len(self._db_config.index_settings):
        self._client_put_settings(self._db_config.index_settings)
        self._logger.debug('Updated index settings')

    self._refresh(self.index_name)
    self._logger.debug(f'Refreshed index {self.index_name}')

`build_query(**kwargs)`

Build a query for ElasticDocIndex.

Parameters:

Name	Type	Description	Default
`kwargs`		parameters to forward to QueryBuilder initialization	`{}`

Returns:

Type	Description
`QueryBuilder`	QueryBuilder object

Source code in docarray/index/backends/elastic.py

def build_query(self, **kwargs) -> QueryBuilder:
    """
    Build a query for ElasticDocIndex.
    :param kwargs: parameters to forward to QueryBuilder initialization
    :return: QueryBuilder object
    """
    return self.QueryBuilder(self, **kwargs)

`configure(runtime_config=None, **kwargs)`

Configure the DocumentIndex. You can either pass a config object to config or pass individual config parameters as keyword arguments. If a configuration object is passed, it will replace the current configuration. If keyword arguments are passed, they will update the current configuration.

Parameters:

Name	Type	Description	Default
`runtime_config`		the configuration to apply	`None`
`kwargs`		individual configuration parameters	`{}`

Source code in docarray/index/abstract.py

def configure(self, runtime_config=None, **kwargs):
    """
    Configure the DocumentIndex.
    You can either pass a config object to `config` or pass individual config
    parameters as keyword arguments.
    If a configuration object is passed, it will replace the current configuration.
    If keyword arguments are passed, they will update the current configuration.

    :param runtime_config: the configuration to apply
    :param kwargs: individual configuration parameters
    """
    if runtime_config is None:
        self._runtime_config = replace(self._runtime_config, **kwargs)
    else:
        if not isinstance(runtime_config, self.RuntimeConfig):
            raise ValueError(f'runtime_config must be of type {self.RuntimeConfig}')
        self._runtime_config = runtime_config

`execute_query(query, *args, **kwargs)`

Execute a query on the ElasticDocIndex.

Can take two kinds of inputs:

A native query of the underlying database. This is meant as a passthrough so that you can enjoy any functionality that is not available through the Document index API.
The output of this Document index' QueryBuilder.build() method.

Parameters:

Name	Type	Description	Default
`query`	`Dict[str, Any]`	the query to execute	required
`args`		positional arguments to pass to the query	`()`
`kwargs`		keyword arguments to pass to the query	`{}`

Returns:

Type	Description
`Any`	the result of the query

Source code in docarray/index/backends/elastic.py

def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
    """
    Execute a query on the ElasticDocIndex.

    Can take two kinds of inputs:

    1. A native query of the underlying database. This is meant as a passthrough so that you
    can enjoy any functionality that is not available through the Document index API.
    2. The output of this Document index' `QueryBuilder.build()` method.

    :param query: the query to execute
    :param args: positional arguments to pass to the query
    :param kwargs: keyword arguments to pass to the query
    :return: the result of the query
    """
    self._logger.debug(f'Executing query: {query}')

    if args or kwargs:
        err_msg = (
            f'args and kwargs not supported for `execute_query` on {type(self)}'
        )
        self._logger.error(err_msg)
        raise ValueError(err_msg)

    resp = self._client.search(index=self.index_name, **query)
    docs, scores = self._format_response(resp)

    return _FindResult(documents=docs, scores=parse_obj_as(NdArray, scores))

`filter(filter_query, limit=10, **kwargs)`

Find documents in the index based on a filter query

Parameters:

Name	Type	Description	Default
`filter_query`	`Any`	the DB specific filter query to execute	required
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`DocList`	a DocList containing the documents that match the filter query

Source code in docarray/index/abstract.py

def filter(
    self,
    filter_query: Any,
    limit: int = 10,
    **kwargs,
) -> DocList:
    """Find documents in the index based on a filter query

    :param filter_query: the DB specific filter query to execute
    :param limit: maximum number of documents to return
    :return: a DocList containing the documents that match the filter query
    """
    self._logger.debug(f'Executing `filter` for the query {filter_query}')
    docs = self._filter(filter_query, limit=limit, **kwargs)

    if isinstance(docs, List) and not isinstance(docs, DocList):
        docs = self._dict_list_to_docarray(docs)

    return docs

`filter_batched(filter_queries, limit=10, **kwargs)`

Find documents in the index based on multiple filter queries.

Parameters:

Name	Type	Description	Default
`filter_queries`	`Any`	the DB specific filter query to execute	required
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`List[DocList]`	a DocList containing the documents that match the filter query

Source code in docarray/index/abstract.py

def filter_batched(
    self,
    filter_queries: Any,
    limit: int = 10,
    **kwargs,
) -> List[DocList]:
    """Find documents in the index based on multiple filter queries.

    :param filter_queries: the DB specific filter query to execute
    :param limit: maximum number of documents to return
    :return: a DocList containing the documents that match the filter query
    """
    self._logger.debug(
        f'Executing `filter_batched` for the queries {filter_queries}'
    )
    da_list = self._filter_batched(filter_queries, limit=limit, **kwargs)

    if len(da_list) > 0 and isinstance(da_list[0], List):
        da_list = [self._dict_list_to_docarray(docs) for docs in da_list]

    return da_list  # type: ignore

`filter_subindex(filter_query, subindex, limit=10, **kwargs)`

Find documents in subindex level based on a filter query

Parameters:

Name	Type	Description	Default
`filter_query`	`Any`	the DB specific filter query to execute	required
`subindex`	`str`	name of the subindex to search on	required
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`DocList`	a DocList containing the subindex level documents that match the filter query

Source code in docarray/index/abstract.py

def filter_subindex(
    self,
    filter_query: Any,
    subindex: str,
    limit: int = 10,
    **kwargs,
) -> DocList:
    """Find documents in subindex level based on a filter query

    :param filter_query: the DB specific filter query to execute
    :param subindex: name of the subindex to search on
    :param limit: maximum number of documents to return
    :return: a DocList containing the subindex level documents that match the filter query
    """
    self._logger.debug(
        f'Executing `filter` for the query {filter_query} in subindex {subindex}'
    )
    if '__' in subindex:
        fields = subindex.split('__')
        return self._subindices[fields[0]].filter_subindex(
            filter_query, '__'.join(fields[1:]), limit=limit, **kwargs
        )
    else:
        return self._subindices[subindex].filter(
            filter_query, limit=limit, **kwargs
        )

`find(query, search_field='', limit=10, **kwargs)`

Find documents in the index using nearest neighbor search.

Parameters:

Name	Type	Description	Default
`query`	`Union[AnyTensor, BaseDoc]`	query vector for KNN/ANN search. Can be either a tensor-like (np.array, torch.Tensor, etc.) with a single axis, or a Document	required
`search_field`	`str`	name of the field to search on. Documents in the index are retrieved based on this similarity of this field to the query.	`''`
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`FindResult`	a named tuple containing `documents` and `scores`

Source code in docarray/index/abstract.py

def find(
    self,
    query: Union[AnyTensor, BaseDoc],
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> FindResult:
    """Find documents in the index using nearest neighbor search.

    :param query: query vector for KNN/ANN search.
        Can be either a tensor-like (np.array, torch.Tensor, etc.)
        with a single axis, or a Document
    :param search_field: name of the field to search on.
        Documents in the index are retrieved based on this similarity
        of this field to the query.
    :param limit: maximum number of documents to return
    :return: a named tuple containing `documents` and `scores`
    """
    self._logger.debug(f'Executing `find` for search field {search_field}')

    self._validate_search_field(search_field)
    if isinstance(query, BaseDoc):
        query_vec = self._get_values_by_column([query], search_field)[0]
    else:
        query_vec = query
    query_vec_np = self._to_numpy(query_vec)
    docs, scores = self._find(
        query_vec_np, search_field=search_field, limit=limit, **kwargs
    )

    if isinstance(docs, List) and not isinstance(docs, DocList):
        docs = self._dict_list_to_docarray(docs)

    return FindResult(documents=docs, scores=scores)

`find_batched(queries, search_field='', limit=10, **kwargs)`

Find documents in the index using nearest neighbor search.

Parameters:

Name	Type	Description	Default
`queries`	`Union[AnyTensor, DocList]`	query vector for KNN/ANN search. Can be either a tensor-like (np.array, torch.Tensor, etc.) with a, or a DocList. If a tensor-like is passed, it should have shape (batch_size, vector_dim)	required
`search_field`	`str`	name of the field to search on. Documents in the index are retrieved based on this similarity of this field to the query.	`''`
`limit`	`int`	maximum number of documents to return per query	`10`

Returns:

Type	Description
`FindResultBatched`	a named tuple containing `documents` and `scores`

Source code in docarray/index/abstract.py

def find_batched(
    self,
    queries: Union[AnyTensor, DocList],
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> FindResultBatched:
    """Find documents in the index using nearest neighbor search.

    :param queries: query vector for KNN/ANN search.
        Can be either a tensor-like (np.array, torch.Tensor, etc.) with a,
        or a DocList.
        If a tensor-like is passed, it should have shape (batch_size, vector_dim)
    :param search_field: name of the field to search on.
        Documents in the index are retrieved based on this similarity
        of this field to the query.
    :param limit: maximum number of documents to return per query
    :return: a named tuple containing `documents` and `scores`
    """
    self._logger.debug(f'Executing `find_batched` for search field {search_field}')

    if search_field:
        if '__' in search_field:
            fields = search_field.split('__')
            if safe_issubclass(self._schema._get_field_annotation(fields[0]), AnyDocArray):  # type: ignore
                return self._subindices[fields[0]].find_batched(
                    queries,
                    search_field='__'.join(fields[1:]),
                    limit=limit,
                    **kwargs,
                )

    self._validate_search_field(search_field)
    if isinstance(queries, Sequence):
        query_vec_list = self._get_values_by_column(queries, search_field)
        query_vec_np = np.stack(
            tuple(self._to_numpy(query_vec) for query_vec in query_vec_list)
        )
    else:
        query_vec_np = self._to_numpy(queries)

    da_list, scores = self._find_batched(
        query_vec_np, search_field=search_field, limit=limit, **kwargs
    )
    if (
        len(da_list) > 0
        and isinstance(da_list[0], List)
        and not isinstance(da_list[0], DocList)
    ):
        da_list = [self._dict_list_to_docarray(docs) for docs in da_list]

    return FindResultBatched(documents=da_list, scores=scores)  # type: ignore

`find_subindex(query, subindex='', search_field='', limit=10, **kwargs)`

Find documents in subindex level.

Parameters:

Name	Type	Description	Default
`query`	`Union[AnyTensor, BaseDoc]`	query vector for KNN/ANN search. Can be either a tensor-like (np.array, torch.Tensor, etc.) with a single axis, or a Document	required
`subindex`	`str`	name of the subindex to search on	`''`
`search_field`	`str`	name of the field to search on	`''`
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`SubindexFindResult`	a named tuple containing root docs, subindex docs and scores

Source code in docarray/index/abstract.py

def find_subindex(
    self,
    query: Union[AnyTensor, BaseDoc],
    subindex: str = '',
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> SubindexFindResult:
    """Find documents in subindex level.

    :param query: query vector for KNN/ANN search.
        Can be either a tensor-like (np.array, torch.Tensor, etc.)
        with a single axis, or a Document
    :param subindex: name of the subindex to search on
    :param search_field: name of the field to search on
    :param limit: maximum number of documents to return
    :return: a named tuple containing root docs, subindex docs and scores
    """
    self._logger.debug(f'Executing `find_subindex` for search field {search_field}')

    sub_docs, scores = self._find_subdocs(
        query, subindex=subindex, search_field=search_field, limit=limit, **kwargs
    )

    fields = subindex.split('__')
    root_ids = [
        self._get_root_doc_id(doc.id, fields[0], '__'.join(fields[1:]))
        for doc in sub_docs
    ]
    root_docs = DocList[self._schema]()  # type: ignore
    for id in root_ids:
        root_docs.append(self[id])

    return SubindexFindResult(
        root_documents=root_docs, sub_documents=sub_docs, scores=scores  # type: ignore
    )

`index(docs, **kwargs)`

index Documents into the index.

Note

Passing a sequence of Documents that is not a DocList (such as a List of Docs) comes at a performance penalty. This is because the Index needs to check compatibility between itself and the data. With a DocList as input this is a single check; for other inputs compatibility needs to be checked for every Document individually.

Parameters:

Name	Type	Description	Default
`docs`	`Union[BaseDoc, Sequence[BaseDoc]]`	Documents to index.	required

Source code in docarray/index/abstract.py

def index(self, docs: Union[BaseDoc, Sequence[BaseDoc]], **kwargs):
    """index Documents into the index.

    !!! note
        Passing a sequence of Documents that is not a DocList
        (such as a List of Docs) comes at a performance penalty.
        This is because the Index needs to check compatibility between itself and
        the data. With a DocList as input this is a single check; for other inputs
        compatibility needs to be checked for every Document individually.

    :param docs: Documents to index.
    """
    n_docs = 1 if isinstance(docs, BaseDoc) else len(docs)
    self._logger.debug(f'Indexing {n_docs} documents')
    docs_validated = self._validate_docs(docs)
    self._update_subindex_data(docs_validated)
    data_by_columns = self._get_col_value_dict(docs_validated)
    self._index(data_by_columns, **kwargs)

`num_docs()`

Get the number of documents.

Source code in docarray/index/backends/elastic.py

def num_docs(self) -> int:
    """
    Get the number of documents.
    """
    self._logger.debug('Getting the number of documents in the index')
    return self._client.count(index=self.index_name)['count']

`python_type_to_db_type(python_type)`

Map python type to database type. Takes any python type and returns the corresponding database column type.

Parameters:

Name	Type	Description	Default
`python_type`	`Type`	a python type.	required

Returns:

Type	Description
`Any`	the corresponding database column type, or None if `python_type` is not supported.

Source code in docarray/index/backends/elastic.py

def python_type_to_db_type(self, python_type: Type) -> Any:
    """Map python type to database type.
    Takes any python type and returns the corresponding database column type.

    :param python_type: a python type.
    :return: the corresponding database column type,
        or None if ``python_type`` is not supported.
    """
    self._logger.debug(f'Mapping Python type {python_type} to database type')

    for allowed_type in ELASTIC_PY_VEC_TYPES:
        if safe_issubclass(python_type, allowed_type):
            self._logger.info(
                f'Mapped Python type {python_type} to database type "dense_vector"'
            )
            return 'dense_vector'

    elastic_py_types = {
        docarray.typing.ID: 'keyword',
        docarray.typing.AnyUrl: 'keyword',
        bool: 'boolean',
        int: 'integer',
        float: 'float',
        str: 'text',
        bytes: 'binary',
        dict: 'object',
    }

    for type in elastic_py_types.keys():
        if safe_issubclass(python_type, type):
            self._logger.info(
                f'Mapped Python type {python_type} to database type "{elastic_py_types[type]}"'
            )
            return elastic_py_types[type]

    err_msg = f'Unsupported column type for {type(self)}: {python_type}'
    self._logger.error(err_msg)
    raise ValueError(err_msg)

`subindex_contains(item)`

Checks if a given BaseDoc item is contained in the index or any of its subindices.

Parameters:

Name	Type	Description	Default
`item`	`BaseDoc`	the given BaseDoc	required

Returns:

Type	Description
`bool`	if the given BaseDoc item is contained in the index/subindices

Source code in docarray/index/abstract.py

def subindex_contains(self, item: BaseDoc) -> bool:
    """Checks if a given BaseDoc item is contained in the index or any of its subindices.

    :param item: the given BaseDoc
    :return: if the given BaseDoc item is contained in the index/subindices
    """
    if self._is_index_empty:
        return False

    if safe_issubclass(type(item), BaseDoc):
        return self.__contains__(item) or any(
            index.subindex_contains(item) for index in self._subindices.values()
        )
    else:
        raise TypeError(
            f"item must be an instance of BaseDoc or its subclass, not '{type(item).__name__}'"
        )

`text_search(query, search_field='', limit=10, **kwargs)`

Find documents in the index based on a text search query.

Parameters:

Name	Type	Description	Default
`query`	`Union[str, BaseDoc]`	The text to search for	required
`search_field`	`str`	name of the field to search on	`''`
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`FindResult`	a named tuple containing `documents` and `scores`

Source code in docarray/index/abstract.py

def text_search(
    self,
    query: Union[str, BaseDoc],
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> FindResult:
    """Find documents in the index based on a text search query.

    :param query: The text to search for
    :param search_field: name of the field to search on
    :param limit: maximum number of documents to return
    :return: a named tuple containing `documents` and `scores`
    """
    self._logger.debug(f'Executing `text_search` for search field {search_field}')
    self._validate_search_field(search_field)
    if isinstance(query, BaseDoc):
        query_text = self._get_values_by_column([query], search_field)[0]
    else:
        query_text = query
    docs, scores = self._text_search(
        query_text, search_field=search_field, limit=limit, **kwargs
    )

    if isinstance(docs, List) and not isinstance(docs, DocList):
        docs = self._dict_list_to_docarray(docs)

    return FindResult(documents=docs, scores=scores)

`text_search_batched(queries, search_field='', limit=10, **kwargs)`

Find documents in the index based on a text search query.

Parameters:

Name	Type	Description	Default
`queries`	`Union[Sequence[str], Sequence[BaseDoc]]`	The texts to search for	required
`search_field`	`str`	name of the field to search on	`''`
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`FindResultBatched`	a named tuple containing `documents` and `scores`

Source code in docarray/index/abstract.py

def text_search_batched(
    self,
    queries: Union[Sequence[str], Sequence[BaseDoc]],
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> FindResultBatched:
    """Find documents in the index based on a text search query.

    :param queries: The texts to search for
    :param search_field: name of the field to search on
    :param limit: maximum number of documents to return
    :return: a named tuple containing `documents` and `scores`
    """
    self._logger.debug(
        f'Executing `text_search_batched` for search field {search_field}'
    )
    self._validate_search_field(search_field)
    if isinstance(queries[0], BaseDoc):
        query_docs: Sequence[BaseDoc] = cast(Sequence[BaseDoc], queries)
        query_texts: Sequence[str] = self._get_values_by_column(
            query_docs, search_field
        )
    else:
        query_texts = cast(Sequence[str], queries)
    da_list, scores = self._text_search_batched(
        query_texts, search_field=search_field, limit=limit, **kwargs
    )

    if len(da_list) > 0 and isinstance(da_list[0], List):
        docs = [self._dict_list_to_docarray(docs) for docs in da_list]
        return FindResultBatched(documents=docs, scores=scores)

    da_list_ = cast(List[DocList], da_list)
    return FindResultBatched(documents=da_list_, scores=scores)

ElasticDocIndex

docarray.index.backends.elastic.ElasticDocIndex

DBConfig dataclass

dense_vector_config()

QueryBuilder

build(*args, **kwargs)

filter(query, limit=10)

find(query, search_field='embedding', limit=10, num_candidates=None)

text_search(query, search_field='text', limit=10)

RuntimeConfig dataclass

__contains__(item)

__delitem__(key)

__getitem__(key)

__init__(db_config=None, **kwargs)

build_query(**kwargs)

configure(runtime_config=None, **kwargs)

execute_query(query, *args, **kwargs)

filter(filter_query, limit=10, **kwargs)

filter_batched(filter_queries, limit=10, **kwargs)

filter_subindex(filter_query, subindex, limit=10, **kwargs)

find(query, search_field='', limit=10, **kwargs)

find_batched(queries, search_field='', limit=10, **kwargs)

find_subindex(query, subindex='', search_field='', limit=10, **kwargs)

index(docs, **kwargs)

num_docs()

python_type_to_db_type(python_type)

subindex_contains(item)

text_search(query, search_field='', limit=10, **kwargs)

text_search_batched(queries, search_field='', limit=10, **kwargs)

`docarray.index.backends.elastic.ElasticDocIndex`

`DBConfig` `dataclass`

`dense_vector_config()`

`QueryBuilder`

`build(*args, **kwargs)`

`filter(query, limit=10)`

`find(query, search_field='embedding', limit=10, num_candidates=None)`

`text_search(query, search_field='text', limit=10)`

`RuntimeConfig` `dataclass`

`contains(item)`

`delitem(key)`

`getitem(key)`

`init(db_config=None, **kwargs)`

`build_query(**kwargs)`

`configure(runtime_config=None, **kwargs)`

`execute_query(query, *args, **kwargs)`

`filter(filter_query, limit=10, **kwargs)`

`filter_batched(filter_queries, limit=10, **kwargs)`

`filter_subindex(filter_query, subindex, limit=10, **kwargs)`

`find(query, search_field='', limit=10, **kwargs)`

`find_batched(queries, search_field='', limit=10, **kwargs)`

`find_subindex(query, subindex='', search_field='', limit=10, **kwargs)`

`index(docs, **kwargs)`

`num_docs()`

`python_type_to_db_type(python_type)`

`subindex_contains(item)`

`text_search(query, search_field='', limit=10, **kwargs)`

`text_search_batched(queries, search_field='', limit=10, **kwargs)`