From 9eaf424c6307d9e652eacb24687f6a78088e7888 Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Mon, 18 May 2026 12:40:54 +0200 Subject: [PATCH 1/2] Add text2vec-digitalocean vectorizer module Adds `text2vec-digitalocean` to the `Vectorizers` enum and exposes factory methods on `Configure.Vectorizer`, `Configure.NamedVectors`, and `Configure.Vectors`. The module accepts an optional `base_url` (server default `https://inference.do-ai.run`) and a `model` (required by the server, e.g. `qwen3-embedding-0.6b`). The shape mirrors `text2vec-mistral` exactly (model + baseURL + vectorizeClassName), so serialization, URL normalization, and the existing _to_dict baseURL-stripping path are reused unchanged. Closes #2038 Co-Authored-By: Claude Opus 4.7 (1M context) --- test/collection/test_config.py | 42 +++++++++++++++++++ .../classes/config_named_vectors.py | 35 ++++++++++++++++ .../collections/classes/config_vectorizers.py | 37 ++++++++++++++++ .../collections/classes/config_vectors.py | 37 ++++++++++++++++ 4 files changed, 151 insertions(+) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 84bba4a63..301a4c69b 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -329,6 +329,20 @@ def test_basic_config(): } }, ), + ( + Configure.Vectorizer.text2vec_digitalocean( + vectorize_collection_name=False, + model="qwen3-embedding-0.6b", + base_url="https://inference.do-ai.run", + ), + { + "text2vec-digitalocean": { + "vectorizeClassName": False, + "model": "qwen3-embedding-0.6b", + "baseURL": "https://inference.do-ai.run/", + } + }, + ), ( Configure.Vectorizer.text2vec_palm( project_id="project", @@ -1771,6 +1785,20 @@ def test_vector_config_flat_pq() -> None: } }, ), + ( + [Configure.NamedVectors.text2vec_digitalocean(name="test", source_properties=["prop"])], + { + "test": { + "vectorizer": { + "text2vec-digitalocean": { + "vectorizeClassName": True, + "properties": ["prop"], + } + }, + "vectorIndexType": "hnsw", + } + }, + ), ( [ Configure.NamedVectors.text2vec_palm( @@ -2373,6 +2401,20 @@ def test_config_with_named_vectors( } }, ), + ( + [Configure.Vectors.text2vec_digitalocean(name="test", source_properties=["prop"])], + { + "test": { + "vectorizer": { + "text2vec-digitalocean": { + "vectorizeClassName": True, + "properties": ["prop"], + } + }, + "vectorIndexType": "hnsw", + } + }, + ), ( [Configure.Vectors.text2vec_morph(name="test", source_properties=["prop"])], { diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 295410b38..1d0b69650 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -48,6 +48,7 @@ _Text2VecCohereConfig, _Text2VecContextionaryConfig, _Text2VecDatabricksConfig, + _Text2VecDigitalOceanConfig, _Text2VecGoogleConfig, _Text2VecGPT4AllConfig, _Text2VecHuggingFaceConfig, @@ -358,6 +359,40 @@ def text2vec_mistral( vector_index_config=vector_index_config, ) + @staticmethod + def text2vec_digitalocean( + name: str, + *, + base_url: Optional[AnyHttpUrl] = None, + model: Optional[str] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _NamedVectorConfigCreate: + """Create a named vector using the `text2vec-digitalocean` model. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings) + for detailed usage. + + Args: + name: The name of the named vector. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`. + model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _NamedVectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecDigitalOceanConfig( + baseURL=base_url, + model=model, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=vector_index_config, + ) + @staticmethod def text2vec_ollama( name: str, diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 60896b417..66c7953cd 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -119,6 +119,7 @@ class Vectorizers(str, Enum): TEXT2VEC_COHERE = "text2vec-cohere" TEXT2VEC_CONTEXTIONARY = "text2vec-contextionary" TEXT2VEC_DATABRICKS = "text2vec-databricks" + TEXT2VEC_DIGITALOCEAN = "text2vec-digitalocean" TEXT2VEC_GPT4ALL = "text2vec-gpt4all" TEXT2VEC_HUGGINGFACE = "text2vec-huggingface" TEXT2VEC_MISTRAL = "text2vec-mistral" @@ -286,6 +287,21 @@ def _to_dict(self) -> Dict[str, Any]: return ret_dict +class _Text2VecDigitalOceanConfig(_VectorizerConfigCreate): + vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( + default=Vectorizers.TEXT2VEC_DIGITALOCEAN, frozen=True, exclude=True + ) + model: Optional[str] + vectorizeClassName: bool + baseURL: Optional[AnyHttpUrl] + + def _to_dict(self) -> Dict[str, Any]: + ret_dict = super()._to_dict() + if self.baseURL is not None: + ret_dict["baseURL"] = self.baseURL.unicode_string() + return ret_dict + + class _Text2VecMorphConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_MORPH, frozen=True, exclude=True @@ -1084,6 +1100,27 @@ def text2vec_mistral( baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name ) + @staticmethod + def text2vec_digitalocean( + *, + base_url: Optional[AnyHttpUrl] = None, + model: Optional[str] = None, + vectorize_collection_name: bool = True, + ) -> _VectorizerConfigCreate: + """Create a `_Text2VecDigitalOceanConfig` object for use when vectorizing using the `text2vec-digitalocean` model. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings) + for detailed usage. + + Args: + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`. + model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server. + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _Text2VecDigitalOceanConfig( + baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name + ) + @staticmethod def text2vec_ollama( *, diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py index 37ab8a912..2c32e7437 100644 --- a/weaviate/collections/classes/config_vectors.py +++ b/weaviate/collections/classes/config_vectors.py @@ -59,6 +59,7 @@ _Text2VecCohereConfig, _Text2VecContextionaryConfig, _Text2VecDatabricksConfig, + _Text2VecDigitalOceanConfig, _Text2VecGoogleConfig, _Text2VecGPT4AllConfig, _Text2VecHuggingFaceConfig, @@ -620,6 +621,42 @@ def text2vec_mistral( vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), ) + @staticmethod + def text2vec_digitalocean( + *, + name: Optional[str] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + model: Optional[str] = None, + source_properties: Optional[List[str]] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + vectorize_collection_name: bool = True, + ) -> _VectorConfigCreate: + """Create a vector using the `text2vec-digitalocean` module. + + See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings) + for detailed usage. + + Args: + name: The name of the vector. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`. + model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server. + source_properties: Which properties should be included when vectorizing. By default all text properties are included. + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. + """ + return _VectorConfigCreate( + name=name, + source_properties=source_properties, + vectorizer=_Text2VecDigitalOceanConfig( + baseURL=base_url, + model=model, + vectorizeClassName=vectorize_collection_name, + ), + vector_index_config=_IndexWrappers.single(vector_index_config, quantizer), + ) + @staticmethod def text2vec_morph( *, From 22aa8fce30bb1416943dab7f4b5b67726a3bcfa0 Mon Sep 17 00:00:00 2001 From: Michelangelo Partipilo Date: Mon, 18 May 2026 13:33:16 +0200 Subject: [PATCH 2/2] Remove oudated constructors for text2vec-digitalocean configuration and make model required --- test/collection/test_config.py | 35 ++++--------------- .../classes/config_named_vectors.py | 35 ------------------- .../collections/classes/config_vectorizers.py | 23 +----------- .../collections/classes/config_vectors.py | 2 +- 4 files changed, 8 insertions(+), 87 deletions(-) diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 301a4c69b..07e89aae2 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -329,20 +329,6 @@ def test_basic_config(): } }, ), - ( - Configure.Vectorizer.text2vec_digitalocean( - vectorize_collection_name=False, - model="qwen3-embedding-0.6b", - base_url="https://inference.do-ai.run", - ), - { - "text2vec-digitalocean": { - "vectorizeClassName": False, - "model": "qwen3-embedding-0.6b", - "baseURL": "https://inference.do-ai.run/", - } - }, - ), ( Configure.Vectorizer.text2vec_palm( project_id="project", @@ -1785,20 +1771,6 @@ def test_vector_config_flat_pq() -> None: } }, ), - ( - [Configure.NamedVectors.text2vec_digitalocean(name="test", source_properties=["prop"])], - { - "test": { - "vectorizer": { - "text2vec-digitalocean": { - "vectorizeClassName": True, - "properties": ["prop"], - } - }, - "vectorIndexType": "hnsw", - } - }, - ), ( [ Configure.NamedVectors.text2vec_palm( @@ -2402,13 +2374,18 @@ def test_config_with_named_vectors( }, ), ( - [Configure.Vectors.text2vec_digitalocean(name="test", source_properties=["prop"])], + [ + Configure.Vectors.text2vec_digitalocean( + name="test", source_properties=["prop"], model="qwen2" + ) + ], { "test": { "vectorizer": { "text2vec-digitalocean": { "vectorizeClassName": True, "properties": ["prop"], + "model": "qwen2", } }, "vectorIndexType": "hnsw", diff --git a/weaviate/collections/classes/config_named_vectors.py b/weaviate/collections/classes/config_named_vectors.py index 1d0b69650..295410b38 100644 --- a/weaviate/collections/classes/config_named_vectors.py +++ b/weaviate/collections/classes/config_named_vectors.py @@ -48,7 +48,6 @@ _Text2VecCohereConfig, _Text2VecContextionaryConfig, _Text2VecDatabricksConfig, - _Text2VecDigitalOceanConfig, _Text2VecGoogleConfig, _Text2VecGPT4AllConfig, _Text2VecHuggingFaceConfig, @@ -359,40 +358,6 @@ def text2vec_mistral( vector_index_config=vector_index_config, ) - @staticmethod - def text2vec_digitalocean( - name: str, - *, - base_url: Optional[AnyHttpUrl] = None, - model: Optional[str] = None, - source_properties: Optional[List[str]] = None, - vector_index_config: Optional[_VectorIndexConfigCreate] = None, - vectorize_collection_name: bool = True, - ) -> _NamedVectorConfigCreate: - """Create a named vector using the `text2vec-digitalocean` model. - - See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings) - for detailed usage. - - Args: - name: The name of the named vector. - base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`. - model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server. - source_properties: Which properties should be included when vectorizing. By default all text properties are included. - vector_index_config: The configuration for Weaviate's vector index. Use wvc.config.Configure.VectorIndex to create a vector index configuration. None by default - vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. - """ - return _NamedVectorConfigCreate( - name=name, - source_properties=source_properties, - vectorizer=_Text2VecDigitalOceanConfig( - baseURL=base_url, - model=model, - vectorizeClassName=vectorize_collection_name, - ), - vector_index_config=vector_index_config, - ) - @staticmethod def text2vec_ollama( name: str, diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 66c7953cd..d96f88f9a 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -291,7 +291,7 @@ class _Text2VecDigitalOceanConfig(_VectorizerConfigCreate): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.TEXT2VEC_DIGITALOCEAN, frozen=True, exclude=True ) - model: Optional[str] + model: str vectorizeClassName: bool baseURL: Optional[AnyHttpUrl] @@ -1100,27 +1100,6 @@ def text2vec_mistral( baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name ) - @staticmethod - def text2vec_digitalocean( - *, - base_url: Optional[AnyHttpUrl] = None, - model: Optional[str] = None, - vectorize_collection_name: bool = True, - ) -> _VectorizerConfigCreate: - """Create a `_Text2VecDigitalOceanConfig` object for use when vectorizing using the `text2vec-digitalocean` model. - - See the [documentation](https://weaviate.io/developers/weaviate/model-providers/digitalocean/embeddings) - for detailed usage. - - Args: - base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default of `https://inference.do-ai.run`. - model: The model to use, e.g. `qwen3-embedding-0.6b`. This is a required field on the server. - vectorize_collection_name: Whether to vectorize the collection name. Defaults to `True`. - """ - return _Text2VecDigitalOceanConfig( - baseURL=base_url, model=model, vectorizeClassName=vectorize_collection_name - ) - @staticmethod def text2vec_ollama( *, diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py index 2c32e7437..0fde62fec 100644 --- a/weaviate/collections/classes/config_vectors.py +++ b/weaviate/collections/classes/config_vectors.py @@ -627,7 +627,7 @@ def text2vec_digitalocean( name: Optional[str] = None, quantizer: Optional[_QuantizerConfigCreate] = None, base_url: Optional[AnyHttpUrl] = None, - model: Optional[str] = None, + model: str, source_properties: Optional[List[str]] = None, vector_index_config: Optional[_VectorIndexConfigCreate] = None, vectorize_collection_name: bool = True,