From 982dc07d89246ca3e30d7224a3805836d71adc5b Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 9 Sep 2025 09:54:59 -0500 Subject: [PATCH 1/5] CRUD Support in Driver for Prefix/Suffix/Substring Indexes --- pymongo/asynchronous/encryption.py | 40 ++++- pymongo/encryption_options.py | 84 ++++++++- pymongo/synchronous/encryption.py | 40 ++++- pyproject.toml | 2 +- test/__init__.py | 14 ++ test/asynchronous/__init__.py | 14 ++ test/asynchronous/test_encryption.py | 257 ++++++++++++++++++++++++++- test/test_encryption.py | 257 ++++++++++++++++++++++++++- 8 files changed, 702 insertions(+), 6 deletions(-) diff --git a/pymongo/asynchronous/encryption.py b/pymongo/asynchronous/encryption.py index f4d66cb956..b302631108 100644 --- a/pymongo/asynchronous/encryption.py +++ b/pymongo/asynchronous/encryption.py @@ -67,7 +67,7 @@ from pymongo.asynchronous.pool import AsyncBaseConnection from pymongo.common import CONNECT_TIMEOUT from pymongo.daemon import _spawn_daemon -from pymongo.encryption_options import AutoEncryptionOpts, RangeOpts +from pymongo.encryption_options import AutoEncryptionOpts, RangeOpts, TextOpts from pymongo.errors import ( ConfigurationError, EncryptedCollectionError, @@ -516,6 +516,11 @@ class Algorithm(str, enum.Enum): .. versionadded:: 4.4 """ + TEXTPREVIEW = "TextPreview" + """**BETA** - TextPreview. + + .. versionadded:: 4.15 + """ class QueryType(str, enum.Enum): @@ -541,6 +546,24 @@ class QueryType(str, enum.Enum): .. versionadded:: 4.4 """ + PREFIXPREVIEW = "prefixPreview" + """**BETA** - Used to encrypt a value for a prefixPreview query. + + .. versionadded:: 4.15 + """ + + SUFFIXPREVIEW = "suffixPreview" + """**BETA** - Used to encrypt a value for a suffixPreview query. + + .. versionadded:: 4.15 + """ + + SUBSTRINGPREVIEW = "substringPreview" + """**BETA** - Used to encrypt a value for a substringPreview query. + + .. versionadded:: 4.15 + """ + def _create_mongocrypt_options(**kwargs: Any) -> MongoCryptOptions: # For compat with pymongocrypt <1.13, avoid setting the default key_expiration_ms. @@ -876,6 +899,7 @@ async def _encrypt_helper( contention_factor: Optional[int] = None, range_opts: Optional[RangeOpts] = None, is_expression: bool = False, + text_opts: Optional[TextOpts] = None, ) -> Any: self._check_closed() if isinstance(key_id, uuid.UUID): @@ -895,6 +919,12 @@ async def _encrypt_helper( range_opts.document, codec_options=self._codec_options, ) + text_opts_bytes = None + if text_opts: + text_opts_bytes = encode( + text_opts.document, + codec_options=self._codec_options, + ) with _wrap_encryption_errors(): encrypted_doc = await self._encryption.encrypt( value=doc, @@ -905,6 +935,7 @@ async def _encrypt_helper( contention_factor=contention_factor, range_opts=range_opts_bytes, is_expression=is_expression, + text_opts=text_opts_bytes, ) return decode(encrypted_doc)["v"] @@ -917,6 +948,7 @@ async def encrypt( query_type: Optional[str] = None, contention_factor: Optional[int] = None, range_opts: Optional[RangeOpts] = None, + text_opts: Optional[TextOpts] = None, ) -> Binary: """Encrypt a BSON value with a given key and algorithm. @@ -937,9 +969,14 @@ async def encrypt( used. :param range_opts: Index options for `range` queries. See :class:`RangeOpts` for some valid options. + :param text_opts: Index options for `textPreview` queries. See + :class:`TextOpts` for some valid options. :return: The encrypted value, a :class:`~bson.binary.Binary` with subtype 6. + .. versionchanged:: 4.9 + Added the `text_opts` parameter. + .. versionchanged:: 4.9 Added the `range_opts` parameter. @@ -960,6 +997,7 @@ async def encrypt( contention_factor=contention_factor, range_opts=range_opts, is_expression=False, + text_opts=text_opts, ), ) diff --git a/pymongo/encryption_options.py b/pymongo/encryption_options.py index bbc736d1c0..da34a3be52 100644 --- a/pymongo/encryption_options.py +++ b/pymongo/encryption_options.py @@ -18,7 +18,7 @@ """ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Mapping, Optional +from typing import TYPE_CHECKING, Any, Mapping, Optional, TypedDict from pymongo.uri_parser_shared import _parse_kms_tls_options @@ -295,3 +295,85 @@ def document(self) -> dict[str, Any]: if v is not None: doc[k] = v return doc + + +class TextOpts: + """**BETA** Options to configure encrypted queries using the text algorithm. + + TextOpts is currently unstable API and subject to backwards breaking changes.""" + + def __init__( + self, + substring: Optional[SubstringOpts] = None, + prefix: Optional[PrefixOpts] = None, + suffix: Optional[SuffixOpts] = None, + case_sensitive: Optional[bool] = None, + diacritic_sensitive: Optional[bool] = None, + ) -> None: + """Options to configure encrypted queries using the text algorithm. + + :param substring: Further options to support substring queries. + :param prefix: Further options to support prefix queries. + :param suffix: Further options to support suffix queries. + :param case_sensitive: Whether text indexes for this field are case sensitive. + :param diacritic_sensitive: Whether text indexes for this field are diacritic sensitive. + + .. versionadded:: 4.15 + """ + self.substring = substring + self.prefix = prefix + self.suffix = suffix + self.case_sensitive = case_sensitive + self.diacritic_sensitive = diacritic_sensitive + + @property + def document(self) -> dict[str, Any]: + doc = {} + for k, v in [ + ("substring", self.substring), + ("prefix", self.prefix), + ("suffix", self.suffix), + ("caseSensitive", self.case_sensitive), + ("diacriticSensitive", self.diacritic_sensitive), + ]: + if v is not None: + doc[k] = v + return doc + + +class SubstringOpts(TypedDict): + """**BETA** Options for substring text queries. + + SubstringOpts is currently unstable API and subject to backwards breaking changes. + """ + + # strMaxLength is the maximum allowed length to insert. Inserting longer strings will error. + strMaxLength: int + # strMinQueryLength is the minimum allowed query length. Querying with a shorter string will error. + strMinQueryLength: int + # strMaxQueryLength is the maximum allowed query length. Querying with a longer string will error. + strMaxQueryLength: int + + +class PrefixOpts(TypedDict): + """**BETA** Options for prefix text queries. + + PrefixOpts is currently unstable API and subject to backwards breaking changes. + """ + + # strMinQueryLength is the minimum allowed query length. Querying with a shorter string will error. + strMinQueryLength: int + # strMaxQueryLength is the maximum allowed query length. Querying with a longer string will error. + strMaxQueryLength: int + + +class SuffixOpts(TypedDict): + """**BETA** Options for suffix text queries. + + SuffixOpts is currently unstable API and subject to backwards breaking changes. + """ + + # strMinQueryLength is the minimum allowed query length. Querying with a shorter string will error. + strMinQueryLength: int + # strMaxQueryLength is the maximum allowed query length. Querying with a longer string will error. + strMaxQueryLength: int diff --git a/pymongo/synchronous/encryption.py b/pymongo/synchronous/encryption.py index 7b98243528..752026af84 100644 --- a/pymongo/synchronous/encryption.py +++ b/pymongo/synchronous/encryption.py @@ -61,7 +61,7 @@ from pymongo import _csot from pymongo.common import CONNECT_TIMEOUT from pymongo.daemon import _spawn_daemon -from pymongo.encryption_options import AutoEncryptionOpts, RangeOpts +from pymongo.encryption_options import AutoEncryptionOpts, RangeOpts, TextOpts from pymongo.errors import ( ConfigurationError, EncryptedCollectionError, @@ -513,6 +513,11 @@ class Algorithm(str, enum.Enum): .. versionadded:: 4.4 """ + TEXTPREVIEW = "TextPreview" + """**BETA** - TextPreview. + + .. versionadded:: 4.15 + """ class QueryType(str, enum.Enum): @@ -538,6 +543,24 @@ class QueryType(str, enum.Enum): .. versionadded:: 4.4 """ + PREFIXPREVIEW = "prefixPreview" + """**BETA** - Used to encrypt a value for a prefixPreview query. + + .. versionadded:: 4.15 + """ + + SUFFIXPREVIEW = "suffixPreview" + """**BETA** - Used to encrypt a value for a suffixPreview query. + + .. versionadded:: 4.15 + """ + + SUBSTRINGPREVIEW = "substringPreview" + """**BETA** - Used to encrypt a value for a substringPreview query. + + .. versionadded:: 4.15 + """ + def _create_mongocrypt_options(**kwargs: Any) -> MongoCryptOptions: # For compat with pymongocrypt <1.13, avoid setting the default key_expiration_ms. @@ -869,6 +892,7 @@ def _encrypt_helper( contention_factor: Optional[int] = None, range_opts: Optional[RangeOpts] = None, is_expression: bool = False, + text_opts: Optional[TextOpts] = None, ) -> Any: self._check_closed() if isinstance(key_id, uuid.UUID): @@ -888,6 +912,12 @@ def _encrypt_helper( range_opts.document, codec_options=self._codec_options, ) + text_opts_bytes = None + if text_opts: + text_opts_bytes = encode( + text_opts.document, + codec_options=self._codec_options, + ) with _wrap_encryption_errors(): encrypted_doc = self._encryption.encrypt( value=doc, @@ -898,6 +928,7 @@ def _encrypt_helper( contention_factor=contention_factor, range_opts=range_opts_bytes, is_expression=is_expression, + text_opts=text_opts_bytes, ) return decode(encrypted_doc)["v"] @@ -910,6 +941,7 @@ def encrypt( query_type: Optional[str] = None, contention_factor: Optional[int] = None, range_opts: Optional[RangeOpts] = None, + text_opts: Optional[TextOpts] = None, ) -> Binary: """Encrypt a BSON value with a given key and algorithm. @@ -930,9 +962,14 @@ def encrypt( used. :param range_opts: Index options for `range` queries. See :class:`RangeOpts` for some valid options. + :param text_opts: Index options for `textPreview` queries. See + :class:`TextOpts` for some valid options. :return: The encrypted value, a :class:`~bson.binary.Binary` with subtype 6. + .. versionchanged:: 4.9 + Added the `text_opts` parameter. + .. versionchanged:: 4.9 Added the `range_opts` parameter. @@ -953,6 +990,7 @@ def encrypt( contention_factor=contention_factor, range_opts=range_opts, is_expression=False, + text_opts=text_opts, ), ) diff --git a/pyproject.toml b/pyproject.toml index 890244b688..5f68e9ebf6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ mockupdb = [ "mockupdb@git+https://github.com/mongodb-labs/mongo-mockup-db@master" ] pymongocrypt_source = [ - "pymongocrypt@git+https://github.com/mongodb/libmongocrypt@master#subdirectory=bindings/python" + "pymongocrypt@git+https://github.com/blink1073/libmongocrypt@PYTHON-5531#subdirectory=bindings/python" ] perf = ["simplejson"] typing = [ diff --git a/test/__init__.py b/test/__init__.py index 12660e3a4a..d583a72f0f 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -32,6 +32,7 @@ import warnings from inspect import iscoroutinefunction +from pymongo.encryption_options import _HAVE_PYMONGOCRYPT from pymongo.errors import AutoReconnect from pymongo.synchronous.uri_parser import parse_uri @@ -524,6 +525,19 @@ def require_version_max(self, *ver): "Server version must be at most %s" % str(other_version), ) + def require_libmongocrypt_min(self, *ver): + other_version = Version(*ver) + if not _HAVE_PYMONGOCRYPT: + version = Version.from_string("0.0.0") + else: + from pymongocrypt import libmongocrypt_version + + version = Version.from_string(libmongocrypt_version()) + return self._require( + lambda: version>= other_version, + "Libmongocrypt version must be at least %s" % str(other_version), + ) + def require_auth(self, func): """Run a test only if the server is running with auth enabled.""" return self._require( diff --git a/test/asynchronous/__init__.py b/test/asynchronous/__init__.py index 7b594b184d..8ab7ff7219 100644 --- a/test/asynchronous/__init__.py +++ b/test/asynchronous/__init__.py @@ -33,6 +33,7 @@ from inspect import iscoroutinefunction from pymongo.asynchronous.uri_parser import parse_uri +from pymongo.encryption_options import _HAVE_PYMONGOCRYPT from pymongo.errors import AutoReconnect try: @@ -524,6 +525,19 @@ def require_version_max(self, *ver): "Server version must be at most %s" % str(other_version), ) + def require_libmongocrypt_min(self, *ver): + other_version = Version(*ver) + if not _HAVE_PYMONGOCRYPT: + version = Version.from_string("0.0.0") + else: + from pymongocrypt import libmongocrypt_version + + version = Version.from_string(libmongocrypt_version()) + return self._require( + lambda: version>= other_version, + "Libmongocrypt version must be at least %s" % str(other_version), + ) + def require_auth(self, func): """Run a test only if the server is running with auth enabled.""" return self._require( diff --git a/test/asynchronous/test_encryption.py b/test/asynchronous/test_encryption.py index 241cb15668..e510de5631 100644 --- a/test/asynchronous/test_encryption.py +++ b/test/asynchronous/test_encryption.py @@ -89,7 +89,7 @@ from pymongo.asynchronous.encryption import Algorithm, AsyncClientEncryption, QueryType from pymongo.asynchronous.mongo_client import AsyncMongoClient from pymongo.cursor_shared import CursorType -from pymongo.encryption_options import _HAVE_PYMONGOCRYPT, AutoEncryptionOpts, RangeOpts +from pymongo.encryption_options import _HAVE_PYMONGOCRYPT, AutoEncryptionOpts, RangeOpts, TextOpts from pymongo.errors import ( AutoReconnect, BulkWriteError, @@ -3443,6 +3443,261 @@ async def test_collection_name_collision(self): self.assertIsInstance(exc.exception.encrypted_fields["fields"][0]["keyId"], Binary) +# https://github.com/mongodb/specifications/blob/master/source/client-side-encryption/tests/README.md#27-text-explicit-encryption +class TestExplicitTextEncryptionProse(AsyncEncryptionIntegrationTest): + @async_client_context.require_no_standalone + @async_client_context.require_version_min(8, 2, -1) + @async_client_context.require_libmongocrypt_min(1, 15, 1) + async def asyncSetUp(self): + await super().asyncSetUp() + # Load the file key1-document.json as key1Document. + self.key1_document = json_data("etc", "data", "keys", "key1-document.json") + # Read the "_id" field of key1Document as key1ID. + self.key1_id = self.key1_document["_id"] + # Drop and create the collection keyvault.datakeys. + # Insert key1Document in keyvault.datakeys with majority write concern. + self.key_vault = await create_key_vault(self.client.keyvault.datakeys, self.key1_document) + self.addAsyncCleanup(self.key_vault.drop) + # Create a ClientEncryption object named clientEncryption with these options. + self.kms_providers = {"local": {"key": LOCAL_MASTER_KEY}} + self.client_encryption = self.create_client_encryption( + self.kms_providers, + self.key_vault.full_name, + self.client, + OPTS, + ) + # Create a MongoClient named encryptedClient with these AutoEncryptionOpts. + opts = AutoEncryptionOpts( + self.kms_providers, + "keyvault.datakeys", + bypass_query_analysis=True, + ) + self.client_encrypted = await self.async_rs_or_single_client(auto_encryption_opts=opts) + + # Using QE CreateCollection() and Collection.Drop(), drop and create the following collections with majority write concern: + # db.prefix-suffix using the encryptedFields option set to the contents of encryptedFields-prefix-suffix.json. + db = self.client_encrypted.db + await db.drop_collection("prefix-suffix") + encrypted_fields = json_data("etc", "data", "encryptedFields-prefix-suffix.json") + await self.client_encryption.create_encrypted_collection( + db, "prefix-suffix", kms_provider="local", encrypted_fields=encrypted_fields + ) + # db.substring using the encryptedFields option set to the contents of encryptedFields-substring.json. + await db.drop_collection("substring") + encrypted_fields = json_data("etc", "data", "encryptedFields-substring.json") + await self.client_encryption.create_encrypted_collection( + db, "substring", kms_provider="local", encrypted_fields=encrypted_fields + ) + + # Use clientEncryption to encrypt the string "foobarbaz" with the following EncryptOpts. + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + prefix=dict(strMaxQueryLength=10, strMinQueryLength=2), + suffix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = await self.client_encryption.encrypt( + "foobarbaz", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to insert the following document into db.prefix-suffix with majority write concern. + coll = self.client_encrypted.db["prefix-suffix"].with_options( + write_concern=WriteConcern(w="majority") + ) + await coll.insert_one({"_id": 0, "encryptedText": encrypted_value}) + + # Use clientEncryption to encrypt the string "foobarbaz" with the following EncryptOpts. + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + substring=dict(strMaxLength=10, strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = await self.client_encryption.encrypt( + "foobarbaz", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to insert the following document into db.substring with majority write concern. + coll = self.client_encrypted.db["substring"].with_options( + write_concern=WriteConcern(w="majority") + ) + await coll.insert_one({"_id": 0, "encryptedText": encrypted_value}) + + async def test_01_can_find_a_document_by_prefix(self): + # Use clientEncryption.encrypt() to encrypt the string "foo" with the following EncryptOpts. + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + prefix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = await self.client_encryption.encrypt( + "foo", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.PREFIXPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.prefix-suffix collection with the following filter. + value = await self.client_encrypted.db["prefix-suffix"].find_one( + {"$expr": {"$encStrStartsWith": {"input": "$encryptedText", "prefix": encrypted_value}}} + ) + # Assert the following document is returned. + expected = {"_id": 0, "encryptedText": "foobarbaz"} + value.pop("__safeContent__", None) + self.assertEqual(value, expected) + + async def test_02_can_find_a_document_by_suffix(self): + # Use clientEncryption.encrypt() to encrypt the string "baz" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + suffix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = await self.client_encryption.encrypt( + "baz", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.SUFFIXPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.prefix-suffix collection with the following filter: + value = await self.client_encrypted.db["prefix-suffix"].find_one( + {"$expr": {"$encStrEndsWith": {"input": "$encryptedText", "suffix": encrypted_value}}} + ) + # Assert the following document is returned. + expected = {"_id": 0, "encryptedText": "foobarbaz"} + value.pop("__safeContent__", None) + self.assertEqual(value, expected) + + async def test_03_no_document_found_by_prefix(self): + # Use clientEncryption.encrypt() to encrypt the string "baz" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + prefix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = await self.client_encryption.encrypt( + "baz", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.PREFIXPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.prefix-suffix collection with the following filter: + value = await self.client_encrypted.db["prefix-suffix"].find_one( + {"$expr": {"$encStrStartsWith": {"input": "$encryptedText", "prefix": encrypted_value}}} + ) + # Assert that no documents are returned. + self.assertIsNone(value) + + async def test_04_no_document_found_by_suffix(self): + # Use clientEncryption.encrypt() to encrypt the string "foo" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + suffix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = await self.client_encryption.encrypt( + "foo", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.SUFFIXPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.prefix-suffix collection with the following filter: + value = await self.client_encrypted.db["prefix-suffix"].find_one( + {"$expr": {"$encStrEndsWith": {"input": "$encryptedText", "suffix": encrypted_value}}} + ) + # Assert that no documents are returned. + self.assertIsNone(value) + + async def test_05_can_find_a_document_by_substring(self): + # Use clientEncryption.encrypt() to encrypt the string "bar" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + substring=dict(strMaxLength=10, strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = await self.client_encryption.encrypt( + "bar", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.SUBSTRINGPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.substring collection with the following filter: + value = await self.client_encrypted.db["substring"].find_one( + { + "$expr": { + "$encStrContains": {"input": "$encryptedText", "substring": encrypted_value} + } + } + ) + # Assert the following document is returned: + expected = {"_id": 0, "encryptedText": "foobarbaz"} + value.pop("__safeContent__", None) + self.assertEqual(value, expected) + + async def test_06_no_document_found_by_substring(self): + # Use clientEncryption.encrypt() to encrypt the string "qux" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + substring=dict(strMaxLength=10, strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = await self.client_encryption.encrypt( + "qux", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.SUBSTRINGPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.substring collection with the following filter: + value = await self.client_encrypted.db["substring"].find_one( + { + "$expr": { + "$encStrContains": {"input": "$encryptedText", "substring": encrypted_value} + } + } + ) + # Assert that no documents are returned. + self.assertIsNone(value) + + async def test_07_contentionFactor_is_required(self): + from pymongocrypt.errors import MongoCryptError + + # Use clientEncryption.encrypt() to encrypt the string "foo" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + prefix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + with self.assertRaises(EncryptionError) as ctx: + await self.client_encryption.encrypt( + "foo", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.PREFIXPREVIEW, + text_opts=text_opts, + ) + # Expect an error from libmongocrypt with a message containing the string: "contention factor is required for textPreview algorithm". + self.assertIsInstance(ctx.exception.cause, MongoCryptError) + self.assertEqual( + str(ctx.exception), "contention factor is required for textPreview algorithm" + ) + + def start_mongocryptd(port) -> None: args = ["mongocryptd", f"--port={port}", "--idleShutdownTimeoutSecs=60"] _spawn_daemon(args) diff --git a/test/test_encryption.py b/test/test_encryption.py index be1b7ec1b6..4eb0331715 100644 --- a/test/test_encryption.py +++ b/test/test_encryption.py @@ -86,7 +86,7 @@ from bson.son import SON from pymongo import ReadPreference from pymongo.cursor_shared import CursorType -from pymongo.encryption_options import _HAVE_PYMONGOCRYPT, AutoEncryptionOpts, RangeOpts +from pymongo.encryption_options import _HAVE_PYMONGOCRYPT, AutoEncryptionOpts, RangeOpts, TextOpts from pymongo.errors import ( AutoReconnect, BulkWriteError, @@ -3425,6 +3425,261 @@ def test_collection_name_collision(self): self.assertIsInstance(exc.exception.encrypted_fields["fields"][0]["keyId"], Binary) +# https://github.com/mongodb/specifications/blob/master/source/client-side-encryption/tests/README.md#27-text-explicit-encryption +class TestExplicitTextEncryptionProse(EncryptionIntegrationTest): + @client_context.require_no_standalone + @client_context.require_version_min(8, 2, -1) + @client_context.require_libmongocrypt_min(1, 15, 1) + def setUp(self): + super().setUp() + # Load the file key1-document.json as key1Document. + self.key1_document = json_data("etc", "data", "keys", "key1-document.json") + # Read the "_id" field of key1Document as key1ID. + self.key1_id = self.key1_document["_id"] + # Drop and create the collection keyvault.datakeys. + # Insert key1Document in keyvault.datakeys with majority write concern. + self.key_vault = create_key_vault(self.client.keyvault.datakeys, self.key1_document) + self.addCleanup(self.key_vault.drop) + # Create a ClientEncryption object named clientEncryption with these options. + self.kms_providers = {"local": {"key": LOCAL_MASTER_KEY}} + self.client_encryption = self.create_client_encryption( + self.kms_providers, + self.key_vault.full_name, + self.client, + OPTS, + ) + # Create a MongoClient named encryptedClient with these AutoEncryptionOpts. + opts = AutoEncryptionOpts( + self.kms_providers, + "keyvault.datakeys", + bypass_query_analysis=True, + ) + self.client_encrypted = self.rs_or_single_client(auto_encryption_opts=opts) + + # Using QE CreateCollection() and Collection.Drop(), drop and create the following collections with majority write concern: + # db.prefix-suffix using the encryptedFields option set to the contents of encryptedFields-prefix-suffix.json. + db = self.client_encrypted.db + db.drop_collection("prefix-suffix") + encrypted_fields = json_data("etc", "data", "encryptedFields-prefix-suffix.json") + self.client_encryption.create_encrypted_collection( + db, "prefix-suffix", kms_provider="local", encrypted_fields=encrypted_fields + ) + # db.substring using the encryptedFields option set to the contents of encryptedFields-substring.json. + db.drop_collection("substring") + encrypted_fields = json_data("etc", "data", "encryptedFields-substring.json") + self.client_encryption.create_encrypted_collection( + db, "substring", kms_provider="local", encrypted_fields=encrypted_fields + ) + + # Use clientEncryption to encrypt the string "foobarbaz" with the following EncryptOpts. + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + prefix=dict(strMaxQueryLength=10, strMinQueryLength=2), + suffix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = self.client_encryption.encrypt( + "foobarbaz", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to insert the following document into db.prefix-suffix with majority write concern. + coll = self.client_encrypted.db["prefix-suffix"].with_options( + write_concern=WriteConcern(w="majority") + ) + coll.insert_one({"_id": 0, "encryptedText": encrypted_value}) + + # Use clientEncryption to encrypt the string "foobarbaz" with the following EncryptOpts. + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + substring=dict(strMaxLength=10, strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = self.client_encryption.encrypt( + "foobarbaz", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to insert the following document into db.substring with majority write concern. + coll = self.client_encrypted.db["substring"].with_options( + write_concern=WriteConcern(w="majority") + ) + coll.insert_one({"_id": 0, "encryptedText": encrypted_value}) + + def test_01_can_find_a_document_by_prefix(self): + # Use clientEncryption.encrypt() to encrypt the string "foo" with the following EncryptOpts. + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + prefix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = self.client_encryption.encrypt( + "foo", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.PREFIXPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.prefix-suffix collection with the following filter. + value = self.client_encrypted.db["prefix-suffix"].find_one( + {"$expr": {"$encStrStartsWith": {"input": "$encryptedText", "prefix": encrypted_value}}} + ) + # Assert the following document is returned. + expected = {"_id": 0, "encryptedText": "foobarbaz"} + value.pop("__safeContent__", None) + self.assertEqual(value, expected) + + def test_02_can_find_a_document_by_suffix(self): + # Use clientEncryption.encrypt() to encrypt the string "baz" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + suffix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = self.client_encryption.encrypt( + "baz", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.SUFFIXPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.prefix-suffix collection with the following filter: + value = self.client_encrypted.db["prefix-suffix"].find_one( + {"$expr": {"$encStrEndsWith": {"input": "$encryptedText", "suffix": encrypted_value}}} + ) + # Assert the following document is returned. + expected = {"_id": 0, "encryptedText": "foobarbaz"} + value.pop("__safeContent__", None) + self.assertEqual(value, expected) + + def test_03_no_document_found_by_prefix(self): + # Use clientEncryption.encrypt() to encrypt the string "baz" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + prefix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = self.client_encryption.encrypt( + "baz", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.PREFIXPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.prefix-suffix collection with the following filter: + value = self.client_encrypted.db["prefix-suffix"].find_one( + {"$expr": {"$encStrStartsWith": {"input": "$encryptedText", "prefix": encrypted_value}}} + ) + # Assert that no documents are returned. + self.assertIsNone(value) + + def test_04_no_document_found_by_suffix(self): + # Use clientEncryption.encrypt() to encrypt the string "foo" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + suffix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = self.client_encryption.encrypt( + "foo", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.SUFFIXPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.prefix-suffix collection with the following filter: + value = self.client_encrypted.db["prefix-suffix"].find_one( + {"$expr": {"$encStrEndsWith": {"input": "$encryptedText", "suffix": encrypted_value}}} + ) + # Assert that no documents are returned. + self.assertIsNone(value) + + def test_05_can_find_a_document_by_substring(self): + # Use clientEncryption.encrypt() to encrypt the string "bar" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + substring=dict(strMaxLength=10, strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = self.client_encryption.encrypt( + "bar", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.SUBSTRINGPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.substring collection with the following filter: + value = self.client_encrypted.db["substring"].find_one( + { + "$expr": { + "$encStrContains": {"input": "$encryptedText", "substring": encrypted_value} + } + } + ) + # Assert the following document is returned: + expected = {"_id": 0, "encryptedText": "foobarbaz"} + value.pop("__safeContent__", None) + self.assertEqual(value, expected) + + def test_06_no_document_found_by_substring(self): + # Use clientEncryption.encrypt() to encrypt the string "qux" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + substring=dict(strMaxLength=10, strMaxQueryLength=10, strMinQueryLength=2), + ) + encrypted_value = self.client_encryption.encrypt( + "qux", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.SUBSTRINGPREVIEW, + contention_factor=0, + text_opts=text_opts, + ) + # Use encryptedClient to run a "find" operation on the db.substring collection with the following filter: + value = self.client_encrypted.db["substring"].find_one( + { + "$expr": { + "$encStrContains": {"input": "$encryptedText", "substring": encrypted_value} + } + } + ) + # Assert that no documents are returned. + self.assertIsNone(value) + + def test_07_contentionFactor_is_required(self): + from pymongocrypt.errors import MongoCryptError + + # Use clientEncryption.encrypt() to encrypt the string "foo" with the following EncryptOpts: + text_opts = TextOpts( + case_sensitive=True, + diacritic_sensitive=True, + prefix=dict(strMaxQueryLength=10, strMinQueryLength=2), + ) + with self.assertRaises(EncryptionError) as ctx: + self.client_encryption.encrypt( + "foo", + key_id=self.key1_id, + algorithm=Algorithm.TEXTPREVIEW, + query_type=QueryType.PREFIXPREVIEW, + text_opts=text_opts, + ) + # Expect an error from libmongocrypt with a message containing the string: "contention factor is required for textPreview algorithm". + self.assertIsInstance(ctx.exception.cause, MongoCryptError) + self.assertEqual( + str(ctx.exception), "contention factor is required for textPreview algorithm" + ) + + def start_mongocryptd(port) -> None: args = ["mongocryptd", f"--port={port}", "--idleShutdownTimeoutSecs=60"] _spawn_daemon(args) From 6ae88a78afa4459cf1d624d1091ae35e124f346f Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 9 Sep 2025 10:03:30 -0500 Subject: [PATCH 2/5] update the changelog --- doc/changelog.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/changelog.rst b/doc/changelog.rst index 305c989106..8329191a60 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -4,6 +4,12 @@ Changes in Version 4.15.0 (XXXX/XX/XX) -------------------------------------- PyMongo 4.15 brings a number of changes including: +- Added :class:`~pymongo.encryption_options.TextOpts`, + :attr:`~pymongo.encryption.Algorithm.TEXTPREVIEW`, + :attr:`~pymongo.encryption.QueryType.PREFIXPREVIEW`, + :attr:`~pymongo.encryption.QueryType.SUFFIXPREVIEW`, + :attr:`~pymongo.encryption.QueryType.SUBSTRINGPREVIEW`, + as part of the experimental Queryable Encryption text queries beta. - Added :class:`bson.decimal128.DecimalEncoder` and :class:`bson.decimal128.DecimalDecoder` to support encoding and decoding of BSON Decimal128 values to decimal.Decimal values using the TypeRegistry API. From 253684d3f2f7fb43de628ac2f79dbaa86528c8c3 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 9 Sep 2025 11:47:17 -0500 Subject: [PATCH 3/5] switch to pymongocrypt master --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5f68e9ebf6..890244b688 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ mockupdb = [ "mockupdb@git+https://github.com/mongodb-labs/mongo-mockup-db@master" ] pymongocrypt_source = [ - "pymongocrypt@git+https://github.com/blink1073/libmongocrypt@PYTHON-5531#subdirectory=bindings/python" + "pymongocrypt@git+https://github.com/mongodb/libmongocrypt@master#subdirectory=bindings/python" ] perf = ["simplejson"] typing = [ From f7266ff00f28b6e4696f7caad692f644f8909d4d Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 9 Sep 2025 15:03:01 -0500 Subject: [PATCH 4/5] update locked version of libmongocrypt --- uv.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/uv.lock b/uv.lock index 9c45c4cdb9..2e0ef2a151 100644 --- a/uv.lock +++ b/uv.lock @@ -1350,8 +1350,8 @@ wheels = [ [[package]] name = "pymongocrypt" -version = "1.14.2.dev0" -source = { git = "https://github.com/mongodb/libmongocrypt?subdirectory=bindings%2Fpython&rev=master#56048cf426bfeffa0805934b668a7af5ed8e907c" } +version = "1.16.0" +source = { git = "https://github.com/mongodb/libmongocrypt?subdirectory=bindings%2Fpython&rev=master#63d2591b84a9d4348cbe1c74556e266cd560ac5b" } dependencies = [ { name = "cffi", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version != '3.14.*'" }, { name = "cffi", version = "2.0.0b1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.14.*'" }, From 98ddc35a885275b11d9328800d09736748ae66c9 Mon Sep 17 00:00:00 2001 From: Steven Silvester Date: Tue, 9 Sep 2025 21:22:38 -0500 Subject: [PATCH 5/5] Add a note about required pymongocrypt version --- doc/changelog.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/changelog.rst b/doc/changelog.rst index 8329191a60..4d95559d69 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -10,6 +10,7 @@ PyMongo 4.15 brings a number of changes including: :attr:`~pymongo.encryption.QueryType.SUFFIXPREVIEW`, :attr:`~pymongo.encryption.QueryType.SUBSTRINGPREVIEW`, as part of the experimental Queryable Encryption text queries beta. + ``pymongocrypt>=1.16`` is required for text query support. - Added :class:`bson.decimal128.DecimalEncoder` and :class:`bson.decimal128.DecimalDecoder` to support encoding and decoding of BSON Decimal128 values to decimal.Decimal values using the TypeRegistry API.

AltStyle によって変換されたページ (->オリジナル) /