QdrantDocumentIndex

`docarray.index.backends.qdrant.QdrantDocumentIndex`

Bases: BaseDocIndex, Generic[TSchema]

Source code in docarray/index/backends/qdrant.py

class QdrantDocumentIndex(BaseDocIndex, Generic[TSchema]):
    UUID_NAMESPACE = uuid.UUID('3896d314-1e95-4a3a-b45a-945f9f0b541d')

    def __init__(self, db_config=None, **kwargs):
        """Initialize QdrantDocumentIndex"""
        if db_config is not None and getattr(
            db_config, 'index_name'
        ):  # this is needed for subindices
            db_config.collection_name = db_config.index_name
        super().__init__(db_config=db_config, **kwargs)
        self._db_config: QdrantDocumentIndex.DBConfig = cast(
            QdrantDocumentIndex.DBConfig, self._db_config
        )
        self._client = qdrant_client.QdrantClient(
            location=self._db_config.location,
            url=self._db_config.url,
            port=self._db_config.port,
            grpc_port=self._db_config.grpc_port,
            prefer_grpc=self._db_config.prefer_grpc,
            https=self._db_config.https,
            api_key=self._db_config.api_key,
            prefix=self._db_config.prefix,
            timeout=self._db_config.timeout,
            host=self._db_config.host,
            path=self._db_config.path,
        )
        self._initialize_collection()
        self._logger.info(f'{self.__class__.__name__} has been initialized')

    @property
    def collection_name(self):
        default_collection_name = (
            self._schema.__name__.lower() if self._schema is not None else None
        )
        if default_collection_name is None:
            raise ValueError(
                'A QdrantDocumentIndex must be typed with a Document type.'
                'To do so, use the syntax: QdrantDocumentIndex[DocumentType]'
            )

        return self._db_config.collection_name or default_collection_name

    @property
    def index_name(self):
        return self.collection_name

    @dataclass
    class Query:
        """Dataclass describing a query."""

        vector_field: Optional[str]
        vector_query: Optional[NdArray]
        filter: Optional[rest.Filter]
        limit: int

    class QueryBuilder(BaseDocIndex.QueryBuilder):
        def __init__(
            self,
            vector_search_field: Optional[str] = None,
            vector_filters: Optional[List[NdArray]] = None,
            payload_filters: Optional[List[rest.Filter]] = None,
            text_search_filters: Optional[List[Tuple[str, str]]] = None,
        ):
            self._vector_search_field: Optional[str] = vector_search_field
            self._vector_filters: List[NdArray] = vector_filters or []
            self._payload_filters: List[rest.Filter] = payload_filters or []
            self._text_search_filters: List[Tuple[str, str]] = text_search_filters or []

        def build(self, limit: int) -> 'QdrantDocumentIndex.Query':
            """
            Build a query object for QdrantDocumentIndex.
            :return: QdrantDocumentIndex.Query object
            """
            vector_query = None
            if len(self._vector_filters) > 0:
                # If there are multiple vector queries applied, we can average them and
                # perform semantic search on a single vector instead
                vector_query = np.average(self._vector_filters, axis=0)
            merged_filter = None
            if len(self._payload_filters) > 0:
                merged_filter = rest.Filter(must=self._payload_filters)
            if len(self._text_search_filters) > 0:
                # Text search is just a special case of payload filtering, so the
                # payload filter is simply extended
                merged_filter = merged_filter or rest.Filter(must=[])
                for search_field, query in self._text_search_filters:
                    merged_filter.must.append(  # type: ignore[union-attr]
                        rest.FieldCondition(
                            key=search_field,
                            match=rest.MatchText(text=query),
                        )
                    )
            return QdrantDocumentIndex.Query(
                vector_field=self._vector_search_field,
                vector_query=vector_query,
                filter=merged_filter,
                limit=limit,
            )

        def find(  # type: ignore[override]
            self, query: NdArray, search_field: str = ''
        ) -> 'QdrantDocumentIndex.QueryBuilder':
            """
            Find k-nearest neighbors of the query.

            :param query: query vector for search. Has single axis.
            :param search_field: field to perform search on
            :return: QueryBuilder object
            """
            if self._vector_search_field and self._vector_search_field != search_field:
                raise ValueError(
                    f'Trying to call .find for search_field = {search_field}, but '
                    f'previously {self._vector_search_field} was used. Only a single '
                    f'field might be used in chained calls.'
                )
            return QdrantDocumentIndex.QueryBuilder(
                vector_search_field=search_field,
                vector_filters=self._vector_filters + [query],
                payload_filters=self._payload_filters,
                text_search_filters=self._text_search_filters,
            )

        def filter(  # type: ignore[override]
            self, filter_query: rest.Filter
        ) -> 'QdrantDocumentIndex.QueryBuilder':
            """Find documents in the index based on a filter query
            :param filter_query: a filter
            :return: QueryBuilder object
            """
            return QdrantDocumentIndex.QueryBuilder(
                vector_search_field=self._vector_search_field,
                vector_filters=self._vector_filters,
                payload_filters=self._payload_filters + [filter_query],
                text_search_filters=self._text_search_filters,
            )

        def text_search(  # type: ignore[override]
            self, query: str, search_field: str = ''
        ) -> 'QdrantDocumentIndex.QueryBuilder':
            """Find documents in the index based on a text search query

            :param query: The text to search for
            :param search_field: name of the field to search on
            :return: QueryBuilder object
            """
            return QdrantDocumentIndex.QueryBuilder(
                vector_search_field=self._vector_search_field,
                vector_filters=self._vector_filters,
                payload_filters=self._payload_filters,
                text_search_filters=self._text_search_filters + [(search_field, query)],
            )

        find_batched = _raise_not_composable('find_batched')
        filter_batched = _raise_not_composable('filter_batched')
        text_search_batched = _raise_not_composable('text_search_batched')

    @dataclass
    class DBConfig(BaseDocIndex.DBConfig):
        """Dataclass that contains all "static" configurations of QdrantDocumentIndex."""

        location: Optional[str] = None
        url: Optional[str] = None
        port: Optional[int] = 6333
        grpc_port: int = 6334
        prefer_grpc: bool = True
        https: Optional[bool] = None
        api_key: Optional[str] = None
        prefix: Optional[str] = None
        timeout: Optional[float] = None
        host: Optional[str] = None
        path: Optional[str] = None
        collection_name: Optional[str] = None
        shard_number: Optional[int] = None
        replication_factor: Optional[int] = None
        write_consistency_factor: Optional[int] = None
        on_disk_payload: Optional[bool] = None
        hnsw_config: Optional[types.HnswConfigDiff] = None
        optimizers_config: Optional[types.OptimizersConfigDiff] = None
        wal_config: Optional[types.WalConfigDiff] = None
        quantization_config: Optional[types.QuantizationConfig] = None
        default_column_config: Dict[Type, Dict[str, Any]] = field(
            default_factory=lambda: {
                'id': {},  # type: ignore[dict-item]
                'vector': {'dim': 128},  # type: ignore[dict-item]
                'payload': {},  # type: ignore[dict-item]
            }
        )

        def __post_init__(self):
            if self.collection_name is None and self.index_name is not None:
                self.collection_name = self.index_name
            if self.index_name is None and self.collection_name is not None:
                self.index_name = self.collection_name

    @dataclass
    class RuntimeConfig(BaseDocIndex.RuntimeConfig):
        """Dataclass that contains all "dynamic" configurations of QdrantDocumentIndex."""

        pass

    def python_type_to_db_type(self, python_type: Type) -> Any:
        """Map python type to database type.
        Takes any python type and returns the corresponding database column type.

        :param python_type: a python type.
        :return: the corresponding database column type.
        """
        if any(safe_issubclass(python_type, vt) for vt in QDRANT_PY_VECTOR_TYPES):
            return 'vector'

        if safe_issubclass(python_type, docarray.typing.id.ID):
            return 'id'

        return 'payload'

    def _initialize_collection(self):
        try:
            self._client.get_collection(self.collection_name)
        except (UnexpectedResponse, RpcError, ValueError):
            vectors_config = {}

            for column_name, column_info in self._column_infos.items():
                if column_info.db_type == 'vector':
                    vectors_config[column_name] = self._to_qdrant_vector_params(
                        column_info
                    )

            self._client.create_collection(
                collection_name=self.collection_name,
                vectors_config=vectors_config,
                shard_number=self._db_config.shard_number,
                replication_factor=self._db_config.replication_factor,
                write_consistency_factor=self._db_config.write_consistency_factor,
                on_disk_payload=self._db_config.on_disk_payload,
                hnsw_config=self._db_config.hnsw_config,
                optimizers_config=self._db_config.optimizers_config,
                wal_config=self._db_config.wal_config,
                quantization_config=self._db_config.quantization_config,
            )
            self._client.create_payload_index(
                collection_name=self.collection_name,
                field_name='__generated_vectors',
                field_schema=rest.PayloadSchemaType.KEYWORD,
            )

    def _index(self, column_to_data: Dict[str, Generator[Any, None, None]]):
        self._index_subindex(column_to_data)

        rows = self._transpose_col_value_dict(column_to_data)
        # TODO: add batching the documents to avoid timeouts
        points = [self._build_point_from_row(row) for row in rows]
        self._client.upsert(
            collection_name=self.collection_name,
            points=points,
        )

    def num_docs(self) -> int:
        """
        Get the number of documents.
        """
        return self._client.count(collection_name=self.collection_name).count

    def _doc_exists(self, doc_id: str) -> bool:
        response, _ = self._client.scroll(
            collection_name=self.index_name,
            scroll_filter=rest.Filter(
                must=[
                    rest.HasIdCondition(has_id=[self._to_qdrant_id(doc_id)]),
                ],
            ),
        )
        return len(response) > 0

    def _del_items(self, doc_ids: Sequence[str]):
        items = self._get_items(doc_ids)
        if len(items) < len(doc_ids):
            found_keys = set(item['id'] for item in items)  # type: ignore[index]
            missing_keys = set(doc_ids) - found_keys
            raise KeyError('Document keys could not found: %s' % ','.join(missing_keys))

        self._client.delete(
            collection_name=self.collection_name,
            points_selector=rest.PointIdsList(
                points=[self._to_qdrant_id(doc_id) for doc_id in doc_ids],
            ),
        )

    def _get_items(
        self, doc_ids: Sequence[str]
    ) -> Union[Sequence[TSchema], Sequence[Dict[str, Any]]]:
        response, _ = self._client.scroll(
            collection_name=self.collection_name,
            scroll_filter=rest.Filter(
                must=[
                    rest.HasIdCondition(
                        has_id=[self._to_qdrant_id(doc_id) for doc_id in doc_ids],
                    ),
                ],
            ),
            limit=len(doc_ids),
            with_payload=True,
            with_vectors=True,
        )
        return sorted(
            [self._convert_to_doc(point) for point in response],
            key=lambda x: doc_ids.index(x['id']),
        )

    def execute_query(self, query: Union[Query, RawQuery], *args, **kwargs) -> DocList:
        """
        Execute a query on the QdrantDocumentIndex.

        Can take two kinds of inputs:

        1. A native query of the underlying database. This is meant as a passthrough so that you
        can enjoy any functionality that is not available through the Document index API.
        2. The output of this Document index's `QueryBuilder.build()` method.

        :param query: the query to execute
        :param args: positional arguments to pass to the query
        :param kwargs: keyword arguments to pass to the query
        :return: the result of the query
        """
        if not isinstance(query, QdrantDocumentIndex.Query):
            points = self._execute_raw_query(query.copy())
        elif query.vector_field:
            # We perform semantic search with some vectors with Qdrant's search method
            # should be called
            points = self._client.search(  # type: ignore[assignment]
                collection_name=self.collection_name,
                query_vector=(query.vector_field, query.vector_query),  # type: ignore[arg-type]
                query_filter=rest.Filter(
                    must=[query.filter],
                    # The following filter takes care of using only those points which
                    # do not have the vector generated. Those are excluded from the
                    # search results.
                    must_not=[
                        rest.FieldCondition(
                            key='__generated_vectors',
                            match=rest.MatchValue(value=query.vector_field),
                        )
                    ],
                ),
                limit=query.limit,
                with_payload=True,
                with_vectors=True,
            )
        else:
            # Just filtering, so Qdrant's scroll has to be used instead
            points, _ = self._client.scroll(  # type: ignore[assignment]
                collection_name=self.collection_name,
                scroll_filter=query.filter,
                limit=query.limit,
                with_payload=True,
                with_vectors=True,
            )

        docs = [self._convert_to_doc(point) for point in points]
        return self._dict_list_to_docarray(docs)

    def _execute_raw_query(
        self, query: RawQuery
    ) -> Sequence[Union[rest.ScoredPoint, rest.Record]]:
        payload_filter = query.pop('filter', None)
        if payload_filter:
            payload_filter = rest.Filter.parse_obj(payload_filter)  # type: ignore[assignment]

        if 'vector' in query:
            # We perform semantic search with some vectors with Qdrant's search method
            # should be called
            search_params = query.pop('params', None)
            if search_params:
                search_params = rest.SearchParams.parse_obj(search_params)  # type: ignore[assignment]
            points = self._client.search(  # type: ignore[assignment]
                collection_name=self.collection_name,
                query_vector=query.pop('vector'),
                query_filter=payload_filter,
                search_params=search_params,
                **query,
            )
        else:
            # Just filtering, so Qdrant's scroll has to be used instead
            points, _ = self._client.scroll(  # type: ignore[assignment]
                collection_name=self.collection_name,
                scroll_filter=payload_filter,
                **query,
            )

        return points

    def _find(
        self, query: np.ndarray, limit: int, search_field: str = ''
    ) -> _FindResult:
        query_batched = np.expand_dims(query, axis=0)
        docs, scores = self._find_batched(
            queries=query_batched, limit=limit, search_field=search_field
        )
        return _FindResult(documents=docs[0], scores=scores[0])  # type: ignore[arg-type]

    def _find_batched(
        self, queries: np.ndarray, limit: int, search_field: str = ''
    ) -> _FindResultBatched:
        responses = self._client.search_batch(
            collection_name=self.collection_name,
            requests=[
                rest.SearchRequest(
                    vector=rest.NamedVector(
                        name=search_field,
                        vector=query.tolist(),  # type: ignore
                    ),
                    # The following filter takes care of using only those points which
                    # do not have the vector generated. Those are excluded from the
                    # search results.
                    filter=rest.Filter(
                        must_not=[
                            rest.FieldCondition(
                                key='__generated_vectors',
                                match=rest.MatchValue(value=search_field),
                            )
                        ]
                    ),
                    limit=limit,
                    with_vector=True,
                    with_payload=True,
                )
                for query in queries
            ],
        )
        return _FindResultBatched(
            documents=[
                [self._convert_to_doc(point) for point in response]
                for response in responses
            ],
            scores=[
                NdArray._docarray_from_native(
                    np.array([point.score for point in response])
                )
                for response in responses
            ],
        )

    def _filter(
        self, filter_query: rest.Filter, limit: int
    ) -> Union[DocList, List[Dict]]:
        query_batched = [filter_query]
        docs = self._filter_batched(filter_queries=query_batched, limit=limit)
        return docs[0]

    def _filter_batched(
        self, filter_queries: Sequence[rest.Filter], limit: int
    ) -> Union[List[DocList], List[List[Dict]]]:
        responses = []
        for filter_query in filter_queries:
            # There is no batch scroll available in Qdrant client yet, so we need to
            # perform the queries one by one. It will be changed in the future versions.
            response, _ = self._client.scroll(
                collection_name=self.collection_name,
                scroll_filter=filter_query,
                limit=limit,
                with_payload=True,
                with_vectors=True,
            )
            responses.append(response)

        return [
            [self._convert_to_doc(point) for point in response]
            for response in responses
        ]

    def _text_search(
        self, query: str, limit: int, search_field: str = ''
    ) -> _FindResult:
        query_batched = [query]
        docs, scores = self._text_search_batched(
            queries=query_batched, limit=limit, search_field=search_field
        )
        return _FindResult(documents=docs[0], scores=scores[0])  # type: ignore[arg-type]

    def _text_search_batched(
        self, queries: Sequence[str], limit: int, search_field: str = ''
    ) -> _FindResultBatched:
        filter_queries = [
            rest.Filter(
                must=[
                    rest.FieldCondition(
                        key=search_field,
                        match=rest.MatchText(text=query),
                    )
                ]
            )
            for query in queries
        ]
        documents_batched = self._filter_batched(
            filter_queries=filter_queries, limit=limit
        )

        # Qdrant does not return any scores if we just filter the objects, without using
        # semantic search over vectors. Thus, each document is scored with a value of 1
        return _FindResultBatched(
            documents=documents_batched,
            scores=[
                NdArray._docarray_from_native(np.ones(len(docs)))
                for docs in documents_batched
            ],
        )

    def _filter_by_parent_id(self, id: str) -> Optional[List[str]]:
        response, _ = self._client.scroll(
            collection_name=self.collection_name,  # type: ignore
            scroll_filter=rest.Filter(
                must=[
                    rest.FieldCondition(
                        key='parent_id', match=rest.MatchValue(value=id)
                    )
                ]
            ),
            with_payload=rest.PayloadSelectorInclude(include=['id']),
        )

        ids = [point.payload['id'] for point in response]  # type: ignore
        return ids

    def _build_point_from_row(self, row: Dict[str, Any]) -> rest.PointStruct:
        point_id = self._to_qdrant_id(row.get('id'))
        vectors: Dict[str, List[float]] = {}
        payload: Dict[str, Any] = {'__generated_vectors': []}
        for column_name, column_info in self._column_infos.items():
            if safe_issubclass(column_info.docarray_type, AnyDocArray):
                continue
            if column_info.db_type in ['id', 'payload']:
                payload[column_name] = row.get(column_name)
                continue

            vector = row.get(column_name)
            if column_info.db_type == 'vector' and vector is not None:
                vectors[column_name] = vector.tolist()
            elif column_info.db_type == 'vector' and vector is None:
                # In that case vector was not provided. Qdrant does not support optional
                # vectors - each point needs to have all the vectors already assigned.
                # Thus, we put a fake embedding with the correct dimensionality and mark
                # such point a point with a boolean flag in the payload.
                vector_size = column_info.n_dim or column_info.config.get('dim')
                vectors[column_name] = np.ones(vector_size).tolist()  # type: ignore[arg-type]
                payload['__generated_vectors'].append(column_name)
            else:
                raise ValueError(
                    f'Could not handle the conversion for column {column_name}. '
                    f'Column info: {column_info}'
                )
        return rest.PointStruct(
            id=point_id,
            vector=vectors,
            payload=payload,
        )

    def _to_qdrant_id(self, external_id: Optional[str]) -> str:
        if external_id is None:
            return uuid.uuid4().hex
        return uuid.uuid5(QdrantDocumentIndex.UUID_NAMESPACE, external_id).hex

    def _to_qdrant_vector_params(self, column_info: _ColumnInfo) -> rest.VectorParams:
        return rest.VectorParams(
            size=column_info.n_dim or column_info.config.get('dim'),
            distance=QDRANT_SPACE_MAPPING[column_info.config.get('space', 'cosine')],
        )

    def _convert_to_doc(
        self, point: Union[rest.ScoredPoint, rest.Record]
    ) -> Dict[str, Any]:
        document = cast(Dict[str, Any], point.payload)
        generated_vectors = (
            document.pop('__generated_vectors')
            if '__generated_vectors' in document
            else []
        )
        vectors = point.vector if point.vector else dict()
        if not isinstance(vectors, dict):
            vectors = {'__default__': vectors}
        for vector_name, vector in vectors.items():
            if vector_name in generated_vectors:
                # That means the vector was generated during the upload, and should not
                # be returned along the other vectors.
                pass
            document[vector_name] = vector
        return document

`DBConfig` `dataclass`

Bases: DBConfig

Dataclass that contains all "static" configurations of QdrantDocumentIndex.

Source code in docarray/index/backends/qdrant.py

@dataclass
class DBConfig(BaseDocIndex.DBConfig):
    """Dataclass that contains all "static" configurations of QdrantDocumentIndex."""

    location: Optional[str] = None
    url: Optional[str] = None
    port: Optional[int] = 6333
    grpc_port: int = 6334
    prefer_grpc: bool = True
    https: Optional[bool] = None
    api_key: Optional[str] = None
    prefix: Optional[str] = None
    timeout: Optional[float] = None
    host: Optional[str] = None
    path: Optional[str] = None
    collection_name: Optional[str] = None
    shard_number: Optional[int] = None
    replication_factor: Optional[int] = None
    write_consistency_factor: Optional[int] = None
    on_disk_payload: Optional[bool] = None
    hnsw_config: Optional[types.HnswConfigDiff] = None
    optimizers_config: Optional[types.OptimizersConfigDiff] = None
    wal_config: Optional[types.WalConfigDiff] = None
    quantization_config: Optional[types.QuantizationConfig] = None
    default_column_config: Dict[Type, Dict[str, Any]] = field(
        default_factory=lambda: {
            'id': {},  # type: ignore[dict-item]
            'vector': {'dim': 128},  # type: ignore[dict-item]
            'payload': {},  # type: ignore[dict-item]
        }
    )

    def __post_init__(self):
        if self.collection_name is None and self.index_name is not None:
            self.collection_name = self.index_name
        if self.index_name is None and self.collection_name is not None:
            self.index_name = self.collection_name

`Query` `dataclass`

Dataclass describing a query.

Source code in docarray/index/backends/qdrant.py

@dataclass
class Query:
    """Dataclass describing a query."""

    vector_field: Optional[str]
    vector_query: Optional[NdArray]
    filter: Optional[rest.Filter]
    limit: int

`QueryBuilder`

Bases: QueryBuilder

Source code in docarray/index/backends/qdrant.py

class QueryBuilder(BaseDocIndex.QueryBuilder):
    def __init__(
        self,
        vector_search_field: Optional[str] = None,
        vector_filters: Optional[List[NdArray]] = None,
        payload_filters: Optional[List[rest.Filter]] = None,
        text_search_filters: Optional[List[Tuple[str, str]]] = None,
    ):
        self._vector_search_field: Optional[str] = vector_search_field
        self._vector_filters: List[NdArray] = vector_filters or []
        self._payload_filters: List[rest.Filter] = payload_filters or []
        self._text_search_filters: List[Tuple[str, str]] = text_search_filters or []

    def build(self, limit: int) -> 'QdrantDocumentIndex.Query':
        """
        Build a query object for QdrantDocumentIndex.
        :return: QdrantDocumentIndex.Query object
        """
        vector_query = None
        if len(self._vector_filters) > 0:
            # If there are multiple vector queries applied, we can average them and
            # perform semantic search on a single vector instead
            vector_query = np.average(self._vector_filters, axis=0)
        merged_filter = None
        if len(self._payload_filters) > 0:
            merged_filter = rest.Filter(must=self._payload_filters)
        if len(self._text_search_filters) > 0:
            # Text search is just a special case of payload filtering, so the
            # payload filter is simply extended
            merged_filter = merged_filter or rest.Filter(must=[])
            for search_field, query in self._text_search_filters:
                merged_filter.must.append(  # type: ignore[union-attr]
                    rest.FieldCondition(
                        key=search_field,
                        match=rest.MatchText(text=query),
                    )
                )
        return QdrantDocumentIndex.Query(
            vector_field=self._vector_search_field,
            vector_query=vector_query,
            filter=merged_filter,
            limit=limit,
        )

    def find(  # type: ignore[override]
        self, query: NdArray, search_field: str = ''
    ) -> 'QdrantDocumentIndex.QueryBuilder':
        """
        Find k-nearest neighbors of the query.

        :param query: query vector for search. Has single axis.
        :param search_field: field to perform search on
        :return: QueryBuilder object
        """
        if self._vector_search_field and self._vector_search_field != search_field:
            raise ValueError(
                f'Trying to call .find for search_field = {search_field}, but '
                f'previously {self._vector_search_field} was used. Only a single '
                f'field might be used in chained calls.'
            )
        return QdrantDocumentIndex.QueryBuilder(
            vector_search_field=search_field,
            vector_filters=self._vector_filters + [query],
            payload_filters=self._payload_filters,
            text_search_filters=self._text_search_filters,
        )

    def filter(  # type: ignore[override]
        self, filter_query: rest.Filter
    ) -> 'QdrantDocumentIndex.QueryBuilder':
        """Find documents in the index based on a filter query
        :param filter_query: a filter
        :return: QueryBuilder object
        """
        return QdrantDocumentIndex.QueryBuilder(
            vector_search_field=self._vector_search_field,
            vector_filters=self._vector_filters,
            payload_filters=self._payload_filters + [filter_query],
            text_search_filters=self._text_search_filters,
        )

    def text_search(  # type: ignore[override]
        self, query: str, search_field: str = ''
    ) -> 'QdrantDocumentIndex.QueryBuilder':
        """Find documents in the index based on a text search query

        :param query: The text to search for
        :param search_field: name of the field to search on
        :return: QueryBuilder object
        """
        return QdrantDocumentIndex.QueryBuilder(
            vector_search_field=self._vector_search_field,
            vector_filters=self._vector_filters,
            payload_filters=self._payload_filters,
            text_search_filters=self._text_search_filters + [(search_field, query)],
        )

    find_batched = _raise_not_composable('find_batched')
    filter_batched = _raise_not_composable('filter_batched')
    text_search_batched = _raise_not_composable('text_search_batched')

`build(limit)`

Build a query object for QdrantDocumentIndex.

Returns:

Type	Description
`Query`	QdrantDocumentIndex.Query object

Source code in docarray/index/backends/qdrant.py

def build(self, limit: int) -> 'QdrantDocumentIndex.Query':
    """
    Build a query object for QdrantDocumentIndex.
    :return: QdrantDocumentIndex.Query object
    """
    vector_query = None
    if len(self._vector_filters) > 0:
        # If there are multiple vector queries applied, we can average them and
        # perform semantic search on a single vector instead
        vector_query = np.average(self._vector_filters, axis=0)
    merged_filter = None
    if len(self._payload_filters) > 0:
        merged_filter = rest.Filter(must=self._payload_filters)
    if len(self._text_search_filters) > 0:
        # Text search is just a special case of payload filtering, so the
        # payload filter is simply extended
        merged_filter = merged_filter or rest.Filter(must=[])
        for search_field, query in self._text_search_filters:
            merged_filter.must.append(  # type: ignore[union-attr]
                rest.FieldCondition(
                    key=search_field,
                    match=rest.MatchText(text=query),
                )
            )
    return QdrantDocumentIndex.Query(
        vector_field=self._vector_search_field,
        vector_query=vector_query,
        filter=merged_filter,
        limit=limit,
    )

`filter(filter_query)`

Find documents in the index based on a filter query

Parameters:

Name	Type	Description	Default
`filter_query`	`Filter`	a filter	required

Returns:

Type	Description
`QueryBuilder`	QueryBuilder object

Source code in docarray/index/backends/qdrant.py

def filter(  # type: ignore[override]
    self, filter_query: rest.Filter
) -> 'QdrantDocumentIndex.QueryBuilder':
    """Find documents in the index based on a filter query
    :param filter_query: a filter
    :return: QueryBuilder object
    """
    return QdrantDocumentIndex.QueryBuilder(
        vector_search_field=self._vector_search_field,
        vector_filters=self._vector_filters,
        payload_filters=self._payload_filters + [filter_query],
        text_search_filters=self._text_search_filters,
    )

`find(query, search_field='')`

Find k-nearest neighbors of the query.

Parameters:

Name	Type	Description	Default
`query`	`NdArray`	query vector for search. Has single axis.	required
`search_field`	`str`	field to perform search on	`''`

Returns:

Type	Description
`QueryBuilder`	QueryBuilder object

Source code in docarray/index/backends/qdrant.py

def find(  # type: ignore[override]
    self, query: NdArray, search_field: str = ''
) -> 'QdrantDocumentIndex.QueryBuilder':
    """
    Find k-nearest neighbors of the query.

    :param query: query vector for search. Has single axis.
    :param search_field: field to perform search on
    :return: QueryBuilder object
    """
    if self._vector_search_field and self._vector_search_field != search_field:
        raise ValueError(
            f'Trying to call .find for search_field = {search_field}, but '
            f'previously {self._vector_search_field} was used. Only a single '
            f'field might be used in chained calls.'
        )
    return QdrantDocumentIndex.QueryBuilder(
        vector_search_field=search_field,
        vector_filters=self._vector_filters + [query],
        payload_filters=self._payload_filters,
        text_search_filters=self._text_search_filters,
    )

`text_search(query, search_field='')`

Find documents in the index based on a text search query

Parameters:

Name	Type	Description	Default
`query`	`str`	The text to search for	required
`search_field`	`str`	name of the field to search on	`''`

Returns:

Type	Description
`QueryBuilder`	QueryBuilder object

Source code in docarray/index/backends/qdrant.py

def text_search(  # type: ignore[override]
    self, query: str, search_field: str = ''
) -> 'QdrantDocumentIndex.QueryBuilder':
    """Find documents in the index based on a text search query

    :param query: The text to search for
    :param search_field: name of the field to search on
    :return: QueryBuilder object
    """
    return QdrantDocumentIndex.QueryBuilder(
        vector_search_field=self._vector_search_field,
        vector_filters=self._vector_filters,
        payload_filters=self._payload_filters,
        text_search_filters=self._text_search_filters + [(search_field, query)],
    )

`RuntimeConfig` `dataclass`

Bases: RuntimeConfig

Dataclass that contains all "dynamic" configurations of QdrantDocumentIndex.

Source code in docarray/index/backends/qdrant.py

@dataclass
class RuntimeConfig(BaseDocIndex.RuntimeConfig):
    """Dataclass that contains all "dynamic" configurations of QdrantDocumentIndex."""

    pass

`contains(item)`

Checks if a given document exists in the index.

Parameters:

Name	Type	Description	Default
`item`	`BaseDoc`	The document to check. It must be an instance of BaseDoc or its subclass.	required

Returns:

Type	Description
`bool`	True if the document exists in the index, False otherwise.

Source code in docarray/index/abstract.py

def __contains__(self, item: BaseDoc) -> bool:
    """
    Checks if a given document exists in the index.

    :param item: The document to check.
        It must be an instance of BaseDoc or its subclass.
    :return: True if the document exists in the index, False otherwise.
    """
    if safe_issubclass(type(item), BaseDoc):
        return self._doc_exists(str(item.id))
    else:
        raise TypeError(
            f"item must be an instance of BaseDoc or its subclass, not '{type(item).__name__}'"
        )

`delitem(key)`

Delete one or multiple Documents from the index, by id. If no document is found, a KeyError is raised.

Parameters:

Name	Type	Description	Default
`key`	`Union[str, Sequence[str]]`	id or ids to delete from the Document index	required

Source code in docarray/index/abstract.py

def __delitem__(self, key: Union[str, Sequence[str]]):
    """Delete one or multiple Documents from the index, by `id`.
    If no document is found, a KeyError is raised.

    :param key: id or ids to delete from the Document index
    """
    self._logger.info(f'Deleting documents with id(s) {key} from the index')
    if isinstance(key, str):
        key = [key]

    # delete nested data
    for field_name, type_, _ in self._flatten_schema(
        cast(Type[BaseDoc], self._schema)
    ):
        if safe_issubclass(type_, AnyDocArray):
            for doc_id in key:
                nested_docs_id = self._subindices[field_name]._filter_by_parent_id(
                    doc_id
                )
                if nested_docs_id:
                    del self._subindices[field_name][nested_docs_id]
    # delete data
    self._del_items(key)

`getitem(key)`

Get one or multiple Documents into the index, by id. If no document is found, a KeyError is raised.

Parameters:

Name	Type	Description	Default
`key`	`Union[str, Sequence[str]]`	id or ids to get from the Document index	required

Source code in docarray/index/abstract.py

def __getitem__(
    self, key: Union[str, Sequence[str]]
) -> Union[TSchema, DocList[TSchema]]:
    """Get one or multiple Documents into the index, by `id`.
    If no document is found, a KeyError is raised.

    :param key: id or ids to get from the Document index
    """
    # normalize input
    if isinstance(key, str):
        return_singleton = True
        key = [key]
    else:
        return_singleton = False

    # retrieve data
    doc_sequence = self._get_items(key)

    # check data
    if len(doc_sequence) == 0:
        raise KeyError(f'No document with id {key} found')

    # retrieve nested data
    for field_name, type_, _ in self._flatten_schema(
        cast(Type[BaseDoc], self._schema)
    ):
        if safe_issubclass(type_, AnyDocArray) and isinstance(
            doc_sequence[0], Dict
        ):
            for doc in doc_sequence:
                self._get_subindex_doclist(doc, field_name)  # type: ignore

    # cast output
    if isinstance(doc_sequence, DocList):
        out_docs: DocList[TSchema] = doc_sequence
    elif isinstance(doc_sequence[0], Dict):
        out_docs = self._dict_list_to_docarray(doc_sequence)  # type: ignore
    else:
        docs_cls = DocList.__class_getitem__(cast(Type[BaseDoc], self._schema))
        out_docs = docs_cls(doc_sequence)

    return out_docs[0] if return_singleton else out_docs

`init(db_config=None, **kwargs)`

Initialize QdrantDocumentIndex

Source code in docarray/index/backends/qdrant.py

def __init__(self, db_config=None, **kwargs):
    """Initialize QdrantDocumentIndex"""
    if db_config is not None and getattr(
        db_config, 'index_name'
    ):  # this is needed for subindices
        db_config.collection_name = db_config.index_name
    super().__init__(db_config=db_config, **kwargs)
    self._db_config: QdrantDocumentIndex.DBConfig = cast(
        QdrantDocumentIndex.DBConfig, self._db_config
    )
    self._client = qdrant_client.QdrantClient(
        location=self._db_config.location,
        url=self._db_config.url,
        port=self._db_config.port,
        grpc_port=self._db_config.grpc_port,
        prefer_grpc=self._db_config.prefer_grpc,
        https=self._db_config.https,
        api_key=self._db_config.api_key,
        prefix=self._db_config.prefix,
        timeout=self._db_config.timeout,
        host=self._db_config.host,
        path=self._db_config.path,
    )
    self._initialize_collection()
    self._logger.info(f'{self.__class__.__name__} has been initialized')

`build_query()`

Build a query for this DocumentIndex.

Returns:

Type	Description
`QueryBuilder`	a new `QueryBuilder` object for this DocumentIndex

Source code in docarray/index/abstract.py

def build_query(self) -> QueryBuilder:
    """
    Build a query for this DocumentIndex.

    :return: a new `QueryBuilder` object for this DocumentIndex
    """
    return self.QueryBuilder()  # type: ignore

`configure(runtime_config=None, **kwargs)`

Configure the DocumentIndex. You can either pass a config object to config or pass individual config parameters as keyword arguments. If a configuration object is passed, it will replace the current configuration. If keyword arguments are passed, they will update the current configuration.

Parameters:

Name	Type	Description	Default
`runtime_config`		the configuration to apply	`None`
`kwargs`		individual configuration parameters	`{}`

Source code in docarray/index/abstract.py

def configure(self, runtime_config=None, **kwargs):
    """
    Configure the DocumentIndex.
    You can either pass a config object to `config` or pass individual config
    parameters as keyword arguments.
    If a configuration object is passed, it will replace the current configuration.
    If keyword arguments are passed, they will update the current configuration.

    :param runtime_config: the configuration to apply
    :param kwargs: individual configuration parameters
    """
    if runtime_config is None:
        self._runtime_config = replace(self._runtime_config, **kwargs)
    else:
        if not isinstance(runtime_config, self.RuntimeConfig):
            raise ValueError(f'runtime_config must be of type {self.RuntimeConfig}')
        self._runtime_config = runtime_config

`execute_query(query, *args, **kwargs)`

Execute a query on the QdrantDocumentIndex.

Can take two kinds of inputs:

A native query of the underlying database. This is meant as a passthrough so that you can enjoy any functionality that is not available through the Document index API.
The output of this Document index's QueryBuilder.build() method.

Parameters:

Name	Type	Description	Default
`query`	`Union[Query, RawQuery]`	the query to execute	required
`args`		positional arguments to pass to the query	`()`
`kwargs`		keyword arguments to pass to the query	`{}`

Returns:

Type	Description
`DocList`	the result of the query

Source code in docarray/index/backends/qdrant.py

def execute_query(self, query: Union[Query, RawQuery], *args, **kwargs) -> DocList:
    """
    Execute a query on the QdrantDocumentIndex.

    Can take two kinds of inputs:

    1. A native query of the underlying database. This is meant as a passthrough so that you
    can enjoy any functionality that is not available through the Document index API.
    2. The output of this Document index's `QueryBuilder.build()` method.

    :param query: the query to execute
    :param args: positional arguments to pass to the query
    :param kwargs: keyword arguments to pass to the query
    :return: the result of the query
    """
    if not isinstance(query, QdrantDocumentIndex.Query):
        points = self._execute_raw_query(query.copy())
    elif query.vector_field:
        # We perform semantic search with some vectors with Qdrant's search method
        # should be called
        points = self._client.search(  # type: ignore[assignment]
            collection_name=self.collection_name,
            query_vector=(query.vector_field, query.vector_query),  # type: ignore[arg-type]
            query_filter=rest.Filter(
                must=[query.filter],
                # The following filter takes care of using only those points which
                # do not have the vector generated. Those are excluded from the
                # search results.
                must_not=[
                    rest.FieldCondition(
                        key='__generated_vectors',
                        match=rest.MatchValue(value=query.vector_field),
                    )
                ],
            ),
            limit=query.limit,
            with_payload=True,
            with_vectors=True,
        )
    else:
        # Just filtering, so Qdrant's scroll has to be used instead
        points, _ = self._client.scroll(  # type: ignore[assignment]
            collection_name=self.collection_name,
            scroll_filter=query.filter,
            limit=query.limit,
            with_payload=True,
            with_vectors=True,
        )

    docs = [self._convert_to_doc(point) for point in points]
    return self._dict_list_to_docarray(docs)

`filter(filter_query, limit=10, **kwargs)`

Find documents in the index based on a filter query

Parameters:

Name	Type	Description	Default
`filter_query`	`Any`	the DB specific filter query to execute	required
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`DocList`	a DocList containing the documents that match the filter query

Source code in docarray/index/abstract.py

def filter(
    self,
    filter_query: Any,
    limit: int = 10,
    **kwargs,
) -> DocList:
    """Find documents in the index based on a filter query

    :param filter_query: the DB specific filter query to execute
    :param limit: maximum number of documents to return
    :return: a DocList containing the documents that match the filter query
    """
    self._logger.debug(f'Executing `filter` for the query {filter_query}')
    docs = self._filter(filter_query, limit=limit, **kwargs)

    if isinstance(docs, List) and not isinstance(docs, DocList):
        docs = self._dict_list_to_docarray(docs)

    return docs

`filter_batched(filter_queries, limit=10, **kwargs)`

Find documents in the index based on multiple filter queries.

Parameters:

Name	Type	Description	Default
`filter_queries`	`Any`	the DB specific filter query to execute	required
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`List[DocList]`	a DocList containing the documents that match the filter query

Source code in docarray/index/abstract.py

def filter_batched(
    self,
    filter_queries: Any,
    limit: int = 10,
    **kwargs,
) -> List[DocList]:
    """Find documents in the index based on multiple filter queries.

    :param filter_queries: the DB specific filter query to execute
    :param limit: maximum number of documents to return
    :return: a DocList containing the documents that match the filter query
    """
    self._logger.debug(
        f'Executing `filter_batched` for the queries {filter_queries}'
    )
    da_list = self._filter_batched(filter_queries, limit=limit, **kwargs)

    if len(da_list) > 0 and isinstance(da_list[0], List):
        da_list = [self._dict_list_to_docarray(docs) for docs in da_list]

    return da_list  # type: ignore

`filter_subindex(filter_query, subindex, limit=10, **kwargs)`

Find documents in subindex level based on a filter query

Parameters:

Name	Type	Description	Default
`filter_query`	`Any`	the DB specific filter query to execute	required
`subindex`	`str`	name of the subindex to search on	required
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`DocList`	a DocList containing the subindex level documents that match the filter query

Source code in docarray/index/abstract.py

def filter_subindex(
    self,
    filter_query: Any,
    subindex: str,
    limit: int = 10,
    **kwargs,
) -> DocList:
    """Find documents in subindex level based on a filter query

    :param filter_query: the DB specific filter query to execute
    :param subindex: name of the subindex to search on
    :param limit: maximum number of documents to return
    :return: a DocList containing the subindex level documents that match the filter query
    """
    self._logger.debug(
        f'Executing `filter` for the query {filter_query} in subindex {subindex}'
    )
    if '__' in subindex:
        fields = subindex.split('__')
        return self._subindices[fields[0]].filter_subindex(
            filter_query, '__'.join(fields[1:]), limit=limit, **kwargs
        )
    else:
        return self._subindices[subindex].filter(
            filter_query, limit=limit, **kwargs
        )

`find(query, search_field='', limit=10, **kwargs)`

Find documents in the index using nearest neighbor search.

Parameters:

Name	Type	Description	Default
`query`	`Union[AnyTensor, BaseDoc]`	query vector for KNN/ANN search. Can be either a tensor-like (np.array, torch.Tensor, etc.) with a single axis, or a Document	required
`search_field`	`str`	name of the field to search on. Documents in the index are retrieved based on this similarity of this field to the query.	`''`
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`FindResult`	a named tuple containing `documents` and `scores`

Source code in docarray/index/abstract.py

def find(
    self,
    query: Union[AnyTensor, BaseDoc],
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> FindResult:
    """Find documents in the index using nearest neighbor search.

    :param query: query vector for KNN/ANN search.
        Can be either a tensor-like (np.array, torch.Tensor, etc.)
        with a single axis, or a Document
    :param search_field: name of the field to search on.
        Documents in the index are retrieved based on this similarity
        of this field to the query.
    :param limit: maximum number of documents to return
    :return: a named tuple containing `documents` and `scores`
    """
    self._logger.debug(f'Executing `find` for search field {search_field}')

    self._validate_search_field(search_field)
    if isinstance(query, BaseDoc):
        query_vec = self._get_values_by_column([query], search_field)[0]
    else:
        query_vec = query
    query_vec_np = self._to_numpy(query_vec)
    docs, scores = self._find(
        query_vec_np, search_field=search_field, limit=limit, **kwargs
    )

    if isinstance(docs, List) and not isinstance(docs, DocList):
        docs = self._dict_list_to_docarray(docs)

    return FindResult(documents=docs, scores=scores)

`find_batched(queries, search_field='', limit=10, **kwargs)`

Find documents in the index using nearest neighbor search.

Parameters:

Name	Type	Description	Default
`queries`	`Union[AnyTensor, DocList]`	query vector for KNN/ANN search. Can be either a tensor-like (np.array, torch.Tensor, etc.) with a, or a DocList. If a tensor-like is passed, it should have shape (batch_size, vector_dim)	required
`search_field`	`str`	name of the field to search on. Documents in the index are retrieved based on this similarity of this field to the query.	`''`
`limit`	`int`	maximum number of documents to return per query	`10`

Returns:

Type	Description
`FindResultBatched`	a named tuple containing `documents` and `scores`

Source code in docarray/index/abstract.py

def find_batched(
    self,
    queries: Union[AnyTensor, DocList],
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> FindResultBatched:
    """Find documents in the index using nearest neighbor search.

    :param queries: query vector for KNN/ANN search.
        Can be either a tensor-like (np.array, torch.Tensor, etc.) with a,
        or a DocList.
        If a tensor-like is passed, it should have shape (batch_size, vector_dim)
    :param search_field: name of the field to search on.
        Documents in the index are retrieved based on this similarity
        of this field to the query.
    :param limit: maximum number of documents to return per query
    :return: a named tuple containing `documents` and `scores`
    """
    self._logger.debug(f'Executing `find_batched` for search field {search_field}')

    if search_field:
        if '__' in search_field:
            fields = search_field.split('__')
            if safe_issubclass(self._schema._get_field_annotation(fields[0]), AnyDocArray):  # type: ignore
                return self._subindices[fields[0]].find_batched(
                    queries,
                    search_field='__'.join(fields[1:]),
                    limit=limit,
                    **kwargs,
                )

    self._validate_search_field(search_field)
    if isinstance(queries, Sequence):
        query_vec_list = self._get_values_by_column(queries, search_field)
        query_vec_np = np.stack(
            tuple(self._to_numpy(query_vec) for query_vec in query_vec_list)
        )
    else:
        query_vec_np = self._to_numpy(queries)

    da_list, scores = self._find_batched(
        query_vec_np, search_field=search_field, limit=limit, **kwargs
    )
    if (
        len(da_list) > 0
        and isinstance(da_list[0], List)
        and not isinstance(da_list[0], DocList)
    ):
        da_list = [self._dict_list_to_docarray(docs) for docs in da_list]

    return FindResultBatched(documents=da_list, scores=scores)  # type: ignore

`find_subindex(query, subindex='', search_field='', limit=10, **kwargs)`

Find documents in subindex level.

Parameters:

Name	Type	Description	Default
`query`	`Union[AnyTensor, BaseDoc]`	query vector for KNN/ANN search. Can be either a tensor-like (np.array, torch.Tensor, etc.) with a single axis, or a Document	required
`subindex`	`str`	name of the subindex to search on	`''`
`search_field`	`str`	name of the field to search on	`''`
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`SubindexFindResult`	a named tuple containing root docs, subindex docs and scores

Source code in docarray/index/abstract.py

def find_subindex(
    self,
    query: Union[AnyTensor, BaseDoc],
    subindex: str = '',
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> SubindexFindResult:
    """Find documents in subindex level.

    :param query: query vector for KNN/ANN search.
        Can be either a tensor-like (np.array, torch.Tensor, etc.)
        with a single axis, or a Document
    :param subindex: name of the subindex to search on
    :param search_field: name of the field to search on
    :param limit: maximum number of documents to return
    :return: a named tuple containing root docs, subindex docs and scores
    """
    self._logger.debug(f'Executing `find_subindex` for search field {search_field}')

    sub_docs, scores = self._find_subdocs(
        query, subindex=subindex, search_field=search_field, limit=limit, **kwargs
    )

    fields = subindex.split('__')
    root_ids = [
        self._get_root_doc_id(doc.id, fields[0], '__'.join(fields[1:]))
        for doc in sub_docs
    ]
    root_docs = DocList[self._schema]()  # type: ignore
    for id in root_ids:
        root_docs.append(self[id])

    return SubindexFindResult(
        root_documents=root_docs, sub_documents=sub_docs, scores=scores  # type: ignore
    )

`index(docs, **kwargs)`

index Documents into the index.

Note

Passing a sequence of Documents that is not a DocList (such as a List of Docs) comes at a performance penalty. This is because the Index needs to check compatibility between itself and the data. With a DocList as input this is a single check; for other inputs compatibility needs to be checked for every Document individually.

Parameters:

Name	Type	Description	Default
`docs`	`Union[BaseDoc, Sequence[BaseDoc]]`	Documents to index.	required

Source code in docarray/index/abstract.py

def index(self, docs: Union[BaseDoc, Sequence[BaseDoc]], **kwargs):
    """index Documents into the index.

    !!! note
        Passing a sequence of Documents that is not a DocList
        (such as a List of Docs) comes at a performance penalty.
        This is because the Index needs to check compatibility between itself and
        the data. With a DocList as input this is a single check; for other inputs
        compatibility needs to be checked for every Document individually.

    :param docs: Documents to index.
    """
    n_docs = 1 if isinstance(docs, BaseDoc) else len(docs)
    self._logger.debug(f'Indexing {n_docs} documents')
    docs_validated = self._validate_docs(docs)
    self._update_subindex_data(docs_validated)
    data_by_columns = self._get_col_value_dict(docs_validated)
    self._index(data_by_columns, **kwargs)

`num_docs()`

Get the number of documents.

Source code in docarray/index/backends/qdrant.py

def num_docs(self) -> int:
    """
    Get the number of documents.
    """
    return self._client.count(collection_name=self.collection_name).count

`python_type_to_db_type(python_type)`

Map python type to database type. Takes any python type and returns the corresponding database column type.

Parameters:

Name	Type	Description	Default
`python_type`	`Type`	a python type.	required

Returns:

Type	Description
`Any`	the corresponding database column type.

Source code in docarray/index/backends/qdrant.py

def python_type_to_db_type(self, python_type: Type) -> Any:
    """Map python type to database type.
    Takes any python type and returns the corresponding database column type.

    :param python_type: a python type.
    :return: the corresponding database column type.
    """
    if any(safe_issubclass(python_type, vt) for vt in QDRANT_PY_VECTOR_TYPES):
        return 'vector'

    if safe_issubclass(python_type, docarray.typing.id.ID):
        return 'id'

    return 'payload'

`subindex_contains(item)`

Checks if a given BaseDoc item is contained in the index or any of its subindices.

Parameters:

Name	Type	Description	Default
`item`	`BaseDoc`	the given BaseDoc	required

Returns:

Type	Description
`bool`	if the given BaseDoc item is contained in the index/subindices

Source code in docarray/index/abstract.py

def subindex_contains(self, item: BaseDoc) -> bool:
    """Checks if a given BaseDoc item is contained in the index or any of its subindices.

    :param item: the given BaseDoc
    :return: if the given BaseDoc item is contained in the index/subindices
    """
    if self._is_index_empty:
        return False

    if safe_issubclass(type(item), BaseDoc):
        return self.__contains__(item) or any(
            index.subindex_contains(item) for index in self._subindices.values()
        )
    else:
        raise TypeError(
            f"item must be an instance of BaseDoc or its subclass, not '{type(item).__name__}'"
        )

`text_search(query, search_field='', limit=10, **kwargs)`

Find documents in the index based on a text search query.

Parameters:

Name	Type	Description	Default
`query`	`Union[str, BaseDoc]`	The text to search for	required
`search_field`	`str`	name of the field to search on	`''`
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`FindResult`	a named tuple containing `documents` and `scores`

Source code in docarray/index/abstract.py

def text_search(
    self,
    query: Union[str, BaseDoc],
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> FindResult:
    """Find documents in the index based on a text search query.

    :param query: The text to search for
    :param search_field: name of the field to search on
    :param limit: maximum number of documents to return
    :return: a named tuple containing `documents` and `scores`
    """
    self._logger.debug(f'Executing `text_search` for search field {search_field}')
    self._validate_search_field(search_field)
    if isinstance(query, BaseDoc):
        query_text = self._get_values_by_column([query], search_field)[0]
    else:
        query_text = query
    docs, scores = self._text_search(
        query_text, search_field=search_field, limit=limit, **kwargs
    )

    if isinstance(docs, List) and not isinstance(docs, DocList):
        docs = self._dict_list_to_docarray(docs)

    return FindResult(documents=docs, scores=scores)

`text_search_batched(queries, search_field='', limit=10, **kwargs)`

Find documents in the index based on a text search query.

Parameters:

Name	Type	Description	Default
`queries`	`Union[Sequence[str], Sequence[BaseDoc]]`	The texts to search for	required
`search_field`	`str`	name of the field to search on	`''`
`limit`	`int`	maximum number of documents to return	`10`

Returns:

Type	Description
`FindResultBatched`	a named tuple containing `documents` and `scores`

Source code in docarray/index/abstract.py

def text_search_batched(
    self,
    queries: Union[Sequence[str], Sequence[BaseDoc]],
    search_field: str = '',
    limit: int = 10,
    **kwargs,
) -> FindResultBatched:
    """Find documents in the index based on a text search query.

    :param queries: The texts to search for
    :param search_field: name of the field to search on
    :param limit: maximum number of documents to return
    :return: a named tuple containing `documents` and `scores`
    """
    self._logger.debug(
        f'Executing `text_search_batched` for search field {search_field}'
    )
    self._validate_search_field(search_field)
    if isinstance(queries[0], BaseDoc):
        query_docs: Sequence[BaseDoc] = cast(Sequence[BaseDoc], queries)
        query_texts: Sequence[str] = self._get_values_by_column(
            query_docs, search_field
        )
    else:
        query_texts = cast(Sequence[str], queries)
    da_list, scores = self._text_search_batched(
        query_texts, search_field=search_field, limit=limit, **kwargs
    )

    if len(da_list) > 0 and isinstance(da_list[0], List):
        docs = [self._dict_list_to_docarray(docs) for docs in da_list]
        return FindResultBatched(documents=docs, scores=scores)

    da_list_ = cast(List[DocList], da_list)
    return FindResultBatched(documents=da_list_, scores=scores)

QdrantDocumentIndex

docarray.index.backends.qdrant.QdrantDocumentIndex

DBConfig dataclass

Query dataclass

QueryBuilder

build(limit)

filter(filter_query)

find(query, search_field='')

text_search(query, search_field='')

RuntimeConfig dataclass

__contains__(item)

__delitem__(key)

__getitem__(key)

__init__(db_config=None, **kwargs)

build_query()

configure(runtime_config=None, **kwargs)

execute_query(query, *args, **kwargs)

filter(filter_query, limit=10, **kwargs)

filter_batched(filter_queries, limit=10, **kwargs)

filter_subindex(filter_query, subindex, limit=10, **kwargs)

find(query, search_field='', limit=10, **kwargs)

find_batched(queries, search_field='', limit=10, **kwargs)

find_subindex(query, subindex='', search_field='', limit=10, **kwargs)

index(docs, **kwargs)

num_docs()

python_type_to_db_type(python_type)

subindex_contains(item)

text_search(query, search_field='', limit=10, **kwargs)

text_search_batched(queries, search_field='', limit=10, **kwargs)

`docarray.index.backends.qdrant.QdrantDocumentIndex`

`DBConfig` `dataclass`

`Query` `dataclass`

`QueryBuilder`

`build(limit)`

`filter(filter_query)`

`find(query, search_field='')`

`text_search(query, search_field='')`

`RuntimeConfig` `dataclass`

`contains(item)`

`delitem(key)`

`getitem(key)`

`init(db_config=None, **kwargs)`

`build_query()`

`configure(runtime_config=None, **kwargs)`

`execute_query(query, *args, **kwargs)`

`filter(filter_query, limit=10, **kwargs)`

`filter_batched(filter_queries, limit=10, **kwargs)`

`filter_subindex(filter_query, subindex, limit=10, **kwargs)`

`find(query, search_field='', limit=10, **kwargs)`

`find_batched(queries, search_field='', limit=10, **kwargs)`

`find_subindex(query, subindex='', search_field='', limit=10, **kwargs)`

`index(docs, **kwargs)`

`num_docs()`

`python_type_to_db_type(python_type)`

`subindex_contains(item)`

`text_search(query, search_field='', limit=10, **kwargs)`

`text_search_batched(queries, search_field='', limit=10, **kwargs)`