diff --git a/.github/workflows/encrypted_settings.py b/.github/workflows/encrypted_settings.py new file mode 100644 index 000000000..0436b04f7 --- /dev/null +++ b/.github/workflows/encrypted_settings.py @@ -0,0 +1,43 @@ +# Settings for django_mongodb_backend/tests when encryption is supported. +import os + +from mongodb_settings import * # noqa: F403 +from pymongo.encryption import AutoEncryptionOpts + +os.environ["LD_LIBRARY_PATH"] = os.environ["GITHUB_WORKSPACE"] + "/lib/" + +DATABASES["encrypted"] = { # noqa: F405 + "ENGINE": "django_mongodb_backend", + "NAME": "djangotests_encrypted", + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + key_vault_namespace="djangotests_encrypted.__keyVault", + kms_providers={"local": {"key": os.urandom(96)}}, + crypt_shared_lib_path=os.environ["GITHUB_WORKSPACE"] + "/lib/mongo_crypt_v1.so", + ), + "directConnection": True, + }, + "KMS_CREDENTIALS": {}, +} + + +class EncryptedRouter: + def db_for_read(self, model, **hints): + if model._meta.app_label == "encryption_": + return "encrypted" + return None + + db_for_write = db_for_read + + def allow_migrate(self, db, app_label, model_name=None, **hints): + # The encryption_ app's models are only created in the encrypted + # database. + if app_label == "encryption_": + return db == "encrypted" + # Don't create other app's models in the encrypted database. + if db == "encrypted": + return False + return None + + +DATABASE_ROUTERS.append(EncryptedRouter()) # noqa: F405 diff --git a/.github/workflows/mongodb_settings.py b/.github/workflows/mongodb_settings.py index 4dce3c0d5..619bdcd95 100644 --- a/.github/workflows/mongodb_settings.py +++ b/.github/workflows/mongodb_settings.py @@ -1,4 +1,5 @@ -# Settings for django_mongodb_backend/tests. +# Settings for django_mongodb_backend/tests when encryption isn't supported. from django_settings import * # noqa: F403 +DATABASES["encrypted"] = {} # noqa: F405 DATABASE_ROUTERS = ["django_mongodb_backend.routers.MongoRouter"] diff --git a/.github/workflows/runtests.py b/.github/workflows/runtests.py index cc258f363..3775c422b 100755 --- a/.github/workflows/runtests.py +++ b/.github/workflows/runtests.py @@ -6,151 +6,6 @@ from django.core.exceptions import ImproperlyConfigured test_apps = [ - "admin_changelist", - "admin_checks", - "admin_custom_urls", - "admin_docs", - "admin_filters", - "admin_inlines", - "admin_ordering", - "admin_scripts", - "admin_utils", - "admin_views", - "admin_widgets", - "aggregation", - "aggregation_regress", - "annotations", - "apps", - "async", - "auth_tests", - "backends", - "basic", - "bulk_create", - "cache", - "check_framework", - "constraints", - "contenttypes_tests", - "context_processors", - "custom_columns", - "custom_lookups", - "custom_managers", - "custom_pk", - "datatypes", - "dates", - "datetimes", - "db_functions", - "defer", - "defer_regress", - "delete", - "delete_regress", - "empty", - "empty_models", - "expressions", - "expressions_case", - "field_defaults", - "file_storage", - "file_uploads", - "fixtures", - "fixtures_model_package", - "fixtures_regress", - "flatpages_tests", - "force_insert_update", - "foreign_object", - "forms_tests", - "from_db_value", - "generic_inline_admin", - "generic_relations", - "generic_relations_regress", - "generic_views", - "get_earliest_or_latest", - "get_object_or_404", - "get_or_create", - "i18n", - "indexes", - "inline_formsets", - "introspection", - "invalid_models_tests", - "known_related_objects", - "lookup", - "m2m_and_m2o", - "m2m_intermediary", - "m2m_multiple", - "m2m_recursive", - "m2m_regress", - "m2m_signals", - "m2m_through", - "m2m_through_regress", - "m2o_recursive", - "managers_regress", - "many_to_many", - "many_to_one", - "many_to_one_null", - "max_lengths", - "messages_tests", - "migrate_signals", - "migration_test_data_persistence", - "migrations", - "model_fields", - "model_forms", - "model_formsets", - "model_formsets_regress", - "model_indexes", - "model_inheritance", - "model_inheritance_regress", - "model_options", - "model_package", - "model_regress", - "model_utils", - "modeladmin", - "multiple_database", - "mutually_referential", - "nested_foreign_keys", - "null_fk", - "null_fk_ordering", - "null_queries", - "one_to_one", - "or_lookups", - "order_with_respect_to", - "ordering", - "pagination", - "prefetch_related", - "proxy_model_inheritance", - "proxy_models", - "queries", - "queryset_pickle", - "redirects_tests", - "reserved_names", - "reverse_lookup", - "save_delete_hooks", - "schema", - "select_for_update", - "select_related", - "select_related_onetoone", - "select_related_regress", - "serializers", - "servers", - "sessions_tests", - "shortcuts", - "signals", - "sitemaps_tests", - "sites_framework", - "sites_tests", - "string_lookup", - "swappable_models", - "syndication_tests", - "test_client", - "test_client_regress", - "test_runner", - "test_utils", - "timezones", - "transactions", - "unmanaged_models", - "update", - "update_only_fields", - "user_commands", - "validation", - "view_tests", - "xor_lookups", # Add directories in django_mongodb_backend/tests *sorted( [ diff --git a/.github/workflows/test-python-atlas.yml b/.github/workflows/test-python-atlas.yml index e98d2512d..953d6c17b 100644 --- a/.github/workflows/test-python-atlas.yml +++ b/.github/workflows/test-python-atlas.yml @@ -28,7 +28,7 @@ jobs: - name: install django-mongodb-backend run: | pip3 install --upgrade pip - pip3 install -e . + pip3 install -e .[encryption] - name: Checkout Django uses: actions/checkout@v5 with: @@ -51,8 +51,14 @@ jobs: run: cp .github/workflows/runtests.py django_repo/tests/runtests_.py - name: Start local Atlas working-directory: . - run: bash .github/workflows/start_local_atlas.sh mongodb/mongodb-atlas-local:7 + run: bash .github/workflows/start_local_atlas.sh mongodb/mongodb-atlas-local:8.0.15 + - name: Download crypt shared + run: | + wget https://downloads.mongodb.com/linux/mongo_crypt_shared_v1-linux-x86_64-enterprise-ubuntu2404-8.2.1.tgz + tar -xvzf mongo_crypt_shared_v1-linux-x86_64-enterprise-ubuntu2404-8.2.1.tgz lib/mongo_crypt_v1.so - name: Run tests run: python3 django_repo/tests/runtests_.py permissions: contents: read + env: + DJANGO_SETTINGS_MODULE: "encrypted_settings" diff --git a/.github/workflows/test-python-geo.yml b/.github/workflows/test-python-geo.yml deleted file mode 100644 index 309f3506a..000000000 --- a/.github/workflows/test-python-geo.yml +++ /dev/null @@ -1,60 +0,0 @@ -# Identical to test-python.yml except that gdal-bin is also installed. -name: Python Tests with GeoDjango - -on: - pull_request: - paths: - - '**.py' - - '!setup.py' - - '.github/workflows/test-python-geo.yml' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -defaults: - run: - shell: bash -eux {0} - -jobs: - build: - name: Django Test Suite - runs-on: ubuntu-latest - steps: - - name: Checkout django-mongodb-backend - uses: actions/checkout@v5 - with: - persist-credentials: false - - name: install django-mongodb-backend - run: | - pip3 install --upgrade pip - pip3 install -e . - - name: Checkout Django - uses: actions/checkout@v5 - with: - repository: 'mongodb-forks/django' - ref: 'mongodb-5.2.x' - path: 'django_repo' - persist-credentials: false - - name: Install system packages for Django's Python test dependencies - run: | - sudo apt-get update - sudo apt-get install gdal-bin libmemcached-dev - - name: Install Django and its Python test dependencies - run: | - cd django_repo/tests/ - pip3 install -e .. - pip3 install -r requirements/py3.txt - - name: Copy the test settings files - run: cp .github/workflows/*_settings.py django_repo/tests/ - - name: Copy the test runner file - run: cp .github/workflows/runtests.py django_repo/tests/runtests_.py - - name: Start MongoDB - uses: supercharge/mongodb-github-action@90004df786821b6308fb02299e5835d0dae05d0d # 1.12.0 - with: - mongodb-version: 6.0 - - name: Run tests - run: python3 django_repo/tests/runtests_.py - permissions: - contents: read diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml deleted file mode 100644 index 7f74b3376..000000000 --- a/.github/workflows/test-python.yml +++ /dev/null @@ -1,59 +0,0 @@ -name: Python Tests - -on: - pull_request: - paths: - - '**.py' - - '!setup.py' - - '.github/workflows/test-python.yml' - workflow_dispatch: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -defaults: - run: - shell: bash -eux {0} - -jobs: - build: - name: Django Test Suite - runs-on: ubuntu-latest - steps: - - name: Checkout django-mongodb-backend - uses: actions/checkout@v5 - with: - persist-credentials: false - - name: install django-mongodb-backend - run: | - pip3 install --upgrade pip - pip3 install -e . - - name: Checkout Django - uses: actions/checkout@v5 - with: - repository: 'mongodb-forks/django' - ref: 'mongodb-5.2.x' - path: 'django_repo' - persist-credentials: false - - name: Install system packages for Django's Python test dependencies - run: | - sudo apt-get update - sudo apt-get install libmemcached-dev - - name: Install Django and its Python test dependencies - run: | - cd django_repo/tests/ - pip3 install -e .. - pip3 install -r requirements/py3.txt - - name: Copy the test settings files - run: cp .github/workflows/*_settings.py django_repo/tests/ - - name: Copy the test runner file - run: cp .github/workflows/runtests.py django_repo/tests/runtests_.py - - name: Start MongoDB - uses: supercharge/mongodb-github-action@90004df786821b6308fb02299e5835d0dae05d0d # 1.12.0 - with: - mongodb-version: 6.0 - - name: Run tests - run: python3 django_repo/tests/runtests_.py - permissions: - contents: read diff --git a/django_mongodb_backend/__init__.py b/django_mongodb_backend/__init__.py index 577a4f104..752d72802 100644 --- a/django_mongodb_backend/__init__.py +++ b/django_mongodb_backend/__init__.py @@ -14,6 +14,7 @@ from .indexes import register_indexes # noqa: E402 from .lookups import register_lookups # noqa: E402 from .query import register_nodes # noqa: E402 +from .routers import register_routers # noqa: E402 __all__ = ["parse_uri"] @@ -25,3 +26,4 @@ register_indexes() register_lookups() register_nodes() +register_routers() diff --git a/django_mongodb_backend/base.py b/django_mongodb_backend/base.py index 88c2a1189..b1afc1b03 100644 --- a/django_mongodb_backend/base.py +++ b/django_mongodb_backend/base.py @@ -11,6 +11,7 @@ from django.utils.functional import cached_property from pymongo.collection import Collection from pymongo.driver_info import DriverInfo +from pymongo.encryption import ClientEncryption from pymongo.mongo_client import MongoClient from pymongo.uri_parser import parse_uri @@ -241,6 +242,16 @@ def get_database(self): return OperationDebugWrapper(self) return self.database + @cached_property + def client_encryption(self): + auto_encryption_opts = self.connection._options.auto_encryption_opts + return ClientEncryption( + auto_encryption_opts._kms_providers, + auto_encryption_opts._key_vault_namespace, + self.connection, + self.connection.codec_options, + ) + @cached_property def database(self): """Connect to the database the first time it's accessed.""" diff --git a/django_mongodb_backend/creation.py b/django_mongodb_backend/creation.py index c8002b2c4..a1d45277e 100644 --- a/django_mongodb_backend/creation.py +++ b/django_mongodb_backend/creation.py @@ -1,5 +1,5 @@ from django.conf import settings -from django.db.backends.base.creation import BaseDatabaseCreation +from django.db.backends.base.creation import TEST_DATABASE_PREFIX, BaseDatabaseCreation class DatabaseCreation(BaseDatabaseCreation): @@ -7,6 +7,14 @@ def _execute_create_test_db(self, cursor, parameters, keepdb=False): # Close the connection (which may point to the non-test database) so # that a new connection to the test database can be established later. self.connection.close_pool() + # Use a test _key_vault_namespace. This assumes the key vault database + # is the same as the encrypted database so that _destroy_test_db() can + # reset the collection by dropping it. + opts = self.connection.settings_dict["OPTIONS"].get("auto_encryption_opts") + if opts: + self.connection.settings_dict["OPTIONS"][ + "auto_encryption_opts" + ]._key_vault_namespace = TEST_DATABASE_PREFIX + opts._key_vault_namespace if not keepdb: self._destroy_test_db(parameters["dbname"], verbosity=0) @@ -24,3 +32,9 @@ def destroy_test_db(self, old_database_name=None, verbosity=1, keepdb=False, suf super().destroy_test_db(old_database_name, verbosity, keepdb, suffix) # Close the connection to the test database. self.connection.close_pool() + # Restore the original _key_vault_namespace. + opts = self.connection.settings_dict["OPTIONS"].get("auto_encryption_opts") + if opts: + self.connection.settings_dict["OPTIONS"][ + "auto_encryption_opts" + ]._key_vault_namespace = opts._key_vault_namespace[len(TEST_DATABASE_PREFIX) :] diff --git a/django_mongodb_backend/features.py b/django_mongodb_backend/features.py index 18a048bf6..0db776053 100644 --- a/django_mongodb_backend/features.py +++ b/django_mongodb_backend/features.py @@ -588,9 +588,21 @@ def django_test_skips(self): skips.update(self._django_test_skips) return skips + @cached_property + def mongodb_version(self): + return self.connection.get_database_version() # e.g., (6, 3, 0) + @cached_property def is_mongodb_6_3(self): - return self.connection.get_database_version() >= (6, 3) + return self.mongodb_version >= (6, 3) + + @cached_property + def is_mongodb_7_0(self): + return self.mongodb_version >= (7, 0) + + @cached_property + def is_mongodb_8_0(self): + return self.mongodb_version >= (8, 0) @cached_property def supports_atlas_search(self): @@ -620,3 +632,22 @@ def _supports_transactions(self): hello = client.command("hello") # a replica set or a sharded cluster return "setName" in hello or hello.get("msg") == "isdbgrid" + + @cached_property + def supports_queryable_encryption(self): + """ + For testing purposes, Queryable Encryption requires a MongoDB 8.0 or + later replica set or sharded cluster, as well as MongoDB Atlas or + Enterprise. This flag must not guard any non-test functionality since + it would prevent MongoDB 7.0 from being used, which also supports + Queryable Encryption. The models in tests/encryption_ aren't compatible + with MongoDB 7.0 because {"queryType": "range"} being "rangePreview". + """ + self.connection.ensure_connection() + build_info = self.connection.connection.admin.command("buildInfo") + is_enterprise = "enterprise" in build_info.get("modules") + return ( + (is_enterprise or self.supports_atlas_search) + and self._supports_transactions + and self.is_mongodb_8_0 + ) diff --git a/django_mongodb_backend/fields/__init__.py b/django_mongodb_backend/fields/__init__.py index 0c95afd69..6cc4bcc18 100644 --- a/django_mongodb_backend/fields/__init__.py +++ b/django_mongodb_backend/fields/__init__.py @@ -3,6 +3,33 @@ from .duration import register_duration_field from .embedded_model import EmbeddedModelField from .embedded_model_array import EmbeddedModelArrayField +from .encryption import ( + EncryptedArrayField, + EncryptedBigIntegerField, + EncryptedBinaryField, + EncryptedBooleanField, + EncryptedCharField, + EncryptedDateField, + EncryptedDateTimeField, + EncryptedDecimalField, + EncryptedDurationField, + EncryptedEmailField, + EncryptedEmbeddedModelArrayField, + EncryptedEmbeddedModelField, + EncryptedFieldMixin, + EncryptedFloatField, + EncryptedGenericIPAddressField, + EncryptedIntegerField, + EncryptedObjectIdField, + EncryptedPositiveBigIntegerField, + EncryptedPositiveIntegerField, + EncryptedPositiveSmallIntegerField, + EncryptedSmallIntegerField, + EncryptedTextField, + EncryptedTimeField, + EncryptedURLField, + EncryptedUUIDField, +) from .json import register_json_field from .objectid import ObjectIdField from .polymorphic_embedded_model import PolymorphicEmbeddedModelField @@ -12,6 +39,31 @@ "ArrayField", "EmbeddedModelArrayField", "EmbeddedModelField", + "EncryptedArrayField", + "EncryptedBigIntegerField", + "EncryptedBinaryField", + "EncryptedBooleanField", + "EncryptedCharField", + "EncryptedDateField", + "EncryptedDateTimeField", + "EncryptedDecimalField", + "EncryptedDurationField", + "EncryptedEmailField", + "EncryptedEmbeddedModelArrayField", + "EncryptedEmbeddedModelField", + "EncryptedFieldMixin", + "EncryptedFloatField", + "EncryptedGenericIPAddressField", + "EncryptedIntegerField", + "EncryptedObjectIdField", + "EncryptedPositiveBigIntegerField", + "EncryptedPositiveIntegerField", + "EncryptedPositiveSmallIntegerField", + "EncryptedSmallIntegerField", + "EncryptedTextField", + "EncryptedTimeField", + "EncryptedURLField", + "EncryptedUUIDField", "ObjectIdAutoField", "ObjectIdField", "PolymorphicEmbeddedModelArrayField", diff --git a/django_mongodb_backend/fields/encryption.py b/django_mongodb_backend/fields/encryption.py new file mode 100644 index 000000000..3ced82769 --- /dev/null +++ b/django_mongodb_backend/fields/encryption.py @@ -0,0 +1,139 @@ +from django.db import models + +from django_mongodb_backend.fields import ArrayField, EmbeddedModelArrayField, EmbeddedModelField +from django_mongodb_backend.fields.objectid import ObjectIdField + + +class EncryptedFieldMixin: + encrypted = True + + def __init__(self, *args, queries=None, db_index=False, null=False, unique=False, **kwargs): + if db_index: + raise ValueError("'db_index=True' is not supported on encrypted fields.") + if null: + raise ValueError("'null=True' is not supported on encrypted fields.") + if unique: + raise ValueError("'unique=True' is not supported on encrypted fields.") + self.queries = queries + super().__init__(*args, **kwargs) + + def deconstruct(self): + name, path, args, kwargs = super().deconstruct() + + if self.queries is not None: + kwargs["queries"] = self.queries + + if path.startswith("django_mongodb_backend.fields.encryption"): + path = path.replace( + "django_mongodb_backend.fields.encryption", + "django_mongodb_backend.fields", + ) + + return name, path, args, kwargs + + +class NoQueriesMixin: + def __init__(self, *args, **kwargs): + if "queries" in kwargs: + raise ValueError(f"{self.__class__.__name__} does not support the queries argument.") + super().__init__(*args, **kwargs) + + +# Django fields +class EncryptedBinaryField(EncryptedFieldMixin, models.BinaryField): + pass + + +class EncryptedBigIntegerField(EncryptedFieldMixin, models.BigIntegerField): + pass + + +class EncryptedBooleanField(EncryptedFieldMixin, models.BooleanField): + pass + + +class EncryptedCharField(EncryptedFieldMixin, models.CharField): + pass + + +class EncryptedDateField(EncryptedFieldMixin, models.DateField): + pass + + +class EncryptedDateTimeField(EncryptedFieldMixin, models.DateTimeField): + pass + + +class EncryptedDecimalField(EncryptedFieldMixin, models.DecimalField): + pass + + +class EncryptedDurationField(EncryptedFieldMixin, models.DurationField): + pass + + +class EncryptedEmailField(EncryptedFieldMixin, models.EmailField): + pass + + +class EncryptedFloatField(EncryptedFieldMixin, models.FloatField): + pass + + +class EncryptedGenericIPAddressField(EncryptedFieldMixin, models.GenericIPAddressField): + pass + + +class EncryptedIntegerField(EncryptedFieldMixin, models.IntegerField): + pass + + +class EncryptedPositiveBigIntegerField(EncryptedFieldMixin, models.PositiveBigIntegerField): + pass + + +class EncryptedPositiveIntegerField(EncryptedFieldMixin, models.PositiveIntegerField): + pass + + +class EncryptedPositiveSmallIntegerField(EncryptedFieldMixin, models.PositiveSmallIntegerField): + pass + + +class EncryptedSmallIntegerField(EncryptedFieldMixin, models.SmallIntegerField): + pass + + +class EncryptedTextField(EncryptedFieldMixin, models.TextField): + pass + + +class EncryptedTimeField(EncryptedFieldMixin, models.TimeField): + pass + + +class EncryptedURLField(EncryptedFieldMixin, models.URLField): + pass + + +class EncryptedUUIDField(EncryptedFieldMixin, models.UUIDField): + pass + + +# MongoDB fields +class EncryptedArrayField(NoQueriesMixin, EncryptedFieldMixin, ArrayField): + pass + + +class EncryptedEmbeddedModelArrayField( + NoQueriesMixin, EncryptedFieldMixin, EmbeddedModelArrayField +): + pass + + +class EncryptedEmbeddedModelField(NoQueriesMixin, EncryptedFieldMixin, EmbeddedModelField): + pass + + +class EncryptedObjectIdField(EncryptedFieldMixin, ObjectIdField): + pass diff --git a/django_mongodb_backend/management/commands/showencryptedfieldsmap.py b/django_mongodb_backend/management/commands/showencryptedfieldsmap.py new file mode 100644 index 000000000..017fabde5 --- /dev/null +++ b/django_mongodb_backend/management/commands/showencryptedfieldsmap.py @@ -0,0 +1,35 @@ +from bson import json_util +from django.apps import apps +from django.core.management.base import BaseCommand +from django.db import DEFAULT_DB_ALIAS, connections, router + +from django_mongodb_backend.utils import model_has_encrypted_fields + + +class Command(BaseCommand): + help = """ + Shows the mapping of encrypted fields to field attributes, including data + type, data keys and query types. The output can be used to set + ``encrypted_fields_map`` in ``AutoEncryptionOpts``. + """ + + def add_arguments(self, parser): + parser.add_argument( + "--database", + default=DEFAULT_DB_ALIAS, + help=""" + Specifies the database to use. Defaults to ``default``.""", + ) + + def handle(self, *args, **options): + db = options["database"] + connection = connections[db] + connection.ensure_connection() + encrypted_fields_map = {} + with connection.schema_editor() as editor: + for app_config in apps.get_app_configs(): + for model in router.get_migratable_models(app_config, db): + if model_has_encrypted_fields(model): + fields = editor._get_encrypted_fields(model, create_data_keys=False) + encrypted_fields_map[model._meta.db_table] = fields + self.stdout.write(json_util.dumps(encrypted_fields_map, indent=4)) diff --git a/django_mongodb_backend/query.py b/django_mongodb_backend/query.py index c743ca8bf..8f295f8e7 100644 --- a/django_mongodb_backend/query.py +++ b/django_mongodb_backend/query.py @@ -24,7 +24,7 @@ def wrapper(*args, **kwargs): except DuplicateKeyError as e: raise IntegrityError from e except PyMongoError as e: - raise DatabaseError from e + raise DatabaseError(str(e)) from e return wrapper diff --git a/django_mongodb_backend/routers.py b/django_mongodb_backend/routers.py index 60e54bbd8..b17f4b021 100644 --- a/django_mongodb_backend/routers.py +++ b/django_mongodb_backend/routers.py @@ -1,6 +1,6 @@ from django.apps import apps - -from django_mongodb_backend.models import EmbeddedModel +from django.core.exceptions import ImproperlyConfigured +from django.db.utils import ConnectionRouter class MongoRouter: @@ -9,6 +9,8 @@ def allow_migrate(self, db, app_label, model_name=None, **hints): EmbeddedModels don't have their own collection and must be ignored by dumpdata. """ + from django_mongodb_backend.models import EmbeddedModel # noqa: PLC0415 + if not model_name: return None try: @@ -16,3 +18,22 @@ def allow_migrate(self, db, app_label, model_name=None, **hints): except LookupError: return None return False if issubclass(model, EmbeddedModel) else None + + +# This function is intended to be monkey-patched as a method of ConnectionRouter. +def kms_provider(self, model, *args, **kwargs): + """ + Return the Key Management Service (KMS) provider for a given model. + + Call each router's kms_provider() method (if present), and return the + first non-None result. Raise ImproperlyConfigured if no provider is found. + """ + for router in self.routers: + func = getattr(router, "kms_provider", None) + if func and callable(func) and (result := func(model, *args, **kwargs)): + return result + raise ImproperlyConfigured("No kms_provider found in database routers.") + + +def register_routers(): + ConnectionRouter.kms_provider = kms_provider diff --git a/django_mongodb_backend/schema.py b/django_mongodb_backend/schema.py index 9bcaecc63..56c3d9699 100644 --- a/django_mongodb_backend/schema.py +++ b/django_mongodb_backend/schema.py @@ -1,5 +1,7 @@ from time import monotonic, sleep +from django.core.exceptions import ImproperlyConfigured +from django.db import router from django.db.backends.base.schema import BaseDatabaseSchemaEditor from django.db.models import Index, UniqueConstraint from pymongo.operations import SearchIndexModel @@ -9,7 +11,7 @@ from .fields import EmbeddedModelField from .gis.schema import GISSchemaEditor from .query import wrap_database_errors -from .utils import OperationCollector +from .utils import OperationCollector, model_has_encrypted_fields def ignore_embedded_models(func): @@ -44,7 +46,7 @@ def get_database(self): @wrap_database_errors @ignore_embedded_models def create_model(self, model): - self.get_database().create_collection(model._meta.db_table) + self._create_collection(model) self._create_model_indexes(model) # Make implicit M2M tables. for field in model._meta.local_many_to_many: @@ -452,6 +454,111 @@ def wait_until_index_dropped(collection, index_name, timeout=60, interval=0.5): sleep(interval) raise TimeoutError(f"Index {index_name} not dropped after {timeout} seconds.") + def _create_collection(self, model): + """ + Create a collection for the model. + If the model has encrypted fields, build (or retrieve) the encrypted_fields schema. + """ + db = self.get_database() + db_table = model._meta.db_table + + if model_has_encrypted_fields(model): + # Encrypted path + client = self.connection.connection + auto_encryption_opts = getattr(client._options, "auto_encryption_opts", None) + if not auto_encryption_opts: + raise ImproperlyConfigured( + f"Tried to create model {model._meta.label} in " + f"'{self.connection.alias}' database. The model has " + "encrypted fields but " + f"DATABASES['{self.connection.alias}']['OPTIONS'] is " + 'missing the "auto_encryption_opts" parameter. If the ' + "model should not be created in this database, adjust " + "your database routers." + ) + encrypted_fields = self._get_encrypted_fields(model) + db.create_collection(db_table, encryptedFields=encrypted_fields) + else: + # Unencrypted path + db.create_collection(db_table) + + def _get_encrypted_fields( + self, model, *, key_alt_name_prefix=None, path_prefix=None, create_data_keys=True + ): + """ + Return the encrypted fields map for the given model. The "prefix" + arguments are used when this method is called recursively on embedded + models. + """ + connection = self.connection + client = connection.connection + key_alt_name_prefix = key_alt_name_prefix or model._meta.db_table + path_prefix = path_prefix or "" + auto_encryption_opts = client._options.auto_encryption_opts + _, key_vault_collection = auto_encryption_opts._key_vault_namespace.split(".", 1) + key_vault = self.get_collection(key_vault_collection) + # Create partial unique index on keyAltNames. + # TODO: find a better place for this. It only needs to run once for an + # application's lifetime. + key_vault.create_index( + "keyAltNames", unique=True, partialFilterExpression={"keyAltNames": {"$exists": True}} + ) + # Select the KMS provider. + kms_providers = auto_encryption_opts._kms_providers + if len(kms_providers) == 1: + # If one provider is configured, no need to consult the router. + kms_provider = next(iter(kms_providers.keys())) + else: + # Otherwise, call the user-defined router.kms_provider(). + kms_provider = router.kms_provider(model) + if kms_provider == "local": + master_key = None + else: + master_key = connection.settings_dict["KMS_CREDENTIALS"][kms_provider] + # Generate the encrypted fields map. + field_list = [] + for field in model._meta.fields: + key_alt_name = f"{key_alt_name_prefix}.{field.column}" + path = f"{path_prefix}.{field.column}" if path_prefix else field.column + # Check non-encrypted EmbeddedModelFields for encrypted fields. + if isinstance(field, EmbeddedModelField) and not getattr(field, "encrypted", False): + embedded_result = self._get_encrypted_fields( + field.embedded_model, + key_alt_name_prefix=key_alt_name, + path_prefix=path, + create_data_keys=create_data_keys, + ) + # An EmbeddedModelField may not have any encrypted fields. + if embedded_result: + field_list.extend(embedded_result["fields"]) + continue + # Populate data for encrypted field. + if getattr(field, "encrypted", False): + if create_data_keys: + data_key = connection.client_encryption.create_data_key( + kms_provider=kms_provider, + key_alt_names=[key_alt_name], + master_key=master_key, + ) + else: + data_key = key_vault.find_one({"keyAltNames": key_alt_name}) + if data_key: + data_key = data_key["_id"] + else: + raise ImproperlyConfigured( + f"Encryption key {key_alt_name} not found. Have " + f"migrated the {model} model?" + ) + field_dict = { + "bsonType": field.db_type(connection), + "path": path, + "keyId": data_key, + } + if queries := getattr(field, "queries", None): + field_dict["queries"] = queries + field_list.append(field_dict) + return {"fields": field_list} + # GISSchemaEditor extends some SchemaEditor methods. class DatabaseSchemaEditor(GISSchemaEditor, BaseSchemaEditor): diff --git a/django_mongodb_backend/utils.py b/django_mongodb_backend/utils.py index 0240250cf..c655c8bc0 100644 --- a/django_mongodb_backend/utils.py +++ b/django_mongodb_backend/utils.py @@ -118,6 +118,7 @@ class OperationDebugWrapper: "create_indexes", "create_search_index", "drop", + "find_one", "index_information", "insert_many", "delete_many", @@ -193,3 +194,23 @@ def wrapper(self, *args, **kwargs): self.log(method, args, kwargs) return wrapper + + +def model_has_encrypted_fields(model): + """ + Recursively check if this model or any embedded models contain encrypted fields. + Returns True if encryption is found anywhere in the hierarchy. + """ + from django_mongodb_backend.fields import EmbeddedModelField # noqa: PLC0415 + + for field in model._meta.fields: + if getattr(field, "encrypted", False): + return True + + # Recursively check embedded models. + if isinstance(field, EmbeddedModelField) and model_has_encrypted_fields( + field.embedded_model + ): + return True + + return False diff --git a/docs/howto/index.rst b/docs/howto/index.rst index 95d7ef632..8451960ef 100644 --- a/docs/howto/index.rst +++ b/docs/howto/index.rst @@ -11,3 +11,4 @@ Project configuration :maxdepth: 1 contrib-apps + queryable-encryption diff --git a/docs/howto/queryable-encryption.rst b/docs/howto/queryable-encryption.rst new file mode 100644 index 000000000..34ec345bb --- /dev/null +++ b/docs/howto/queryable-encryption.rst @@ -0,0 +1,385 @@ +================================ +Configuring Queryable Encryption +================================ + +.. versionadded:: 5.2.3 + +:doc:`manual:core/queryable-encryption` is a powerful MongoDB feature that +allows you to encrypt sensitive fields in your database while still supporting +queries on that encrypted data. + +This section will guide you through the process of configuring Queryable +Encryption in your Django project. + +.. admonition:: MongoDB requirements + + Queryable Encryption can be used with MongoDB replica sets or sharded + clusters running version 7.0 or later. Standalone instances are not + supported. The :ref:`manual:qe-compatibility-reference` table summarizes + which MongoDB server products support Queryable Encryption. + +Installation +============ + +In addition to Django MongoDB Backend's regular :doc:`installation +` and :doc:`configuration ` steps, Queryable +Encryption has additional Python dependencies: + +.. code-block:: console + + $ pip install django-mongodb-backend[encryption] + +.. _qe-configuring-databases-setting: + +Configuring the ``DATABASES`` setting +===================================== + +In addition to the :ref:`database settings ` +required to use Django MongoDB Backend, Queryable Encryption requires +configuring a separate database connection that uses use PyMongo's +:class:`~pymongo.encryption_options.AutoEncryptionOpts`. + +Here's a sample configuration using a local KMS provider:: + + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "default": { + "ENGINE": "django_mongodb_backend", + "HOST": "mongodb+srv://cluster0.example.mongodb.net", + "NAME": "my_database", + # ... + }, + "encrypted": { + "ENGINE": "django_mongodb_backend", + "HOST": "mongodb+srv://cluster0.example.mongodb.net", + "NAME": "my_encrypted_database", + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + key_vault_namespace="my_encrypted_database.__keyVault", + kms_providers={ + "local": { + # Generated by os.urandom(96) + "key": ( + b'-\xc3\x0c\xe3\x93\xc3\x8b\xc0\xf8\x12\xc5#b' + b'\x19\xf3\xbc\xccR\xc8\xedI\xda\\ \xfb\x9cB' + b'\x7f\xab5\xe7\xb5\xc9x\xb8\xd4d\xba\xdc\x9c' + b'\x9a\xdb9J]\xe6\xce\x104p\x079q.=\xeb\x9dK*' + b'\x97\xea\xf8\x1e\xc3\xd49K\x18\x81\xc3\x1a"' + b'\xdc\x00U\xc4u"X\xe7xy\xa5\xb2\x0e\xbc\xd6+-' + b'\x80\x03\xef\xc2\xc4\x9bU' + }, + }, + ) + }, + }, + } + +``key_vault_namespace`` specifies where to store the data encryption keys. +The database name of the key vault must be the same as in ``"NAME"``. The +vault's collection name can be whatever you wish, but by convention, it's often +``__keyVault``. + +.. _qe-configuring-database-routers-setting: + +Configuring the ``DATABASE_ROUTERS`` setting +============================================ + +Similar to configuring the :ref:`DATABASE_ROUTERS +` setting for +:doc:`embedded models `, Queryable Encryption requires +a :setting:`DATABASE_ROUTERS` setting to route database operations to the +encrypted database. + +The following example shows how to configure a router for the ``"myapp"`` +application that routes database operations to the encrypted database for all +models in that application:: + + # myapp/routers.py + class EncryptedRouter: + def allow_migrate(self, db, app_label, model_name=None, **hints): + if app_label == "myapp": + return db == "encrypted" + # Prevent migrations on the encrypted database for other apps + if db == "encrypted": + return False + return None + + def db_for_read(self, model, **hints): + if model._meta.app_label == "myapp": + return "encrypted" + return None + + db_for_write = db_for_read + +Then in your Django settings, add the custom database router to the +:setting:`django:DATABASE_ROUTERS` setting:: + + # settings.py + DATABASE_ROUTERS = [ + "django_mongodb_backend.routers.MongoRouter", + "myapp.routers.EncryptedRouter", + ] + +Encrypted fields +================ + +Now you can start using encrypted fields in your Django models. + +:doc:`Encrypted fields ` may be used to protect +sensitive data like social security numbers, credit card information, or +personal health information. With Queryable Encryption, you can also perform +queries on encrypted fields. To use encrypted fields in your models, +import the necessary field types from ``django_mongodb_backend.models`` and +define your models as usual. + +Here are models based on the `Python Queryable Encryption Tutorial`_:: + + # myapp/models.py + from django.db import models + from django_mongodb_backend.models import EmbeddedModel + from django_mongodb_backend.fields import ( + EmbeddedModelField, + EncryptedCharField, + EncryptedEmbeddedModelField, + ) + + + class PatientRecord(EmbeddedModel): + ssn = EncryptedCharField(max_length=11, queries={"queryType": "equality"}) + billing = EncryptedEmbeddedModelField("Billing") + bill_amount = models.DecimalField(max_digits=10, decimal_places=2) + + class Patient(models.Model): + patient_name = models.CharField(max_length=255) + patient_id = models.BigIntegerField() + patient_record = EmbeddedModelField("PatientRecord") + + def __str__(self): + return f"{self.patient_name} ({self.patient_id})" + + class Billing(EmbeddedModel): + cc_type = models.CharField(max_length=50) + cc_number = models.CharField(max_length=20) + +.. _Python Queryable Encryption Tutorial: https://github.com/mongodb/docs/tree/main/content/manual/manual/source/includes/qe-tutorials/python + +.. _qe-migrations: + +Migrations +========== + +Once you have defined your models, create a migration as usual: + +.. code-block:: console + + $ python manage.py makemigrations + +Then run the migrations with: + +.. code-block:: console + + $ python manage.py migrate --database encrypted + +.. warning:: + + Be aware that you cannot add encrypted fields to existing models, nor can + you change the definition of an encrypted field, for example, to make it + queryable. + +Now create and manipulate instances of the data just like any other Django +model data. The fields will automatically handle encryption and decryption, +ensuring that :ref:`sensitive data is stored securely in the database +`. + +Querying encrypted fields +========================= + +In order to query encrypted fields, you must include the :ref:`queries +` argument. For example, notice ``PatientRecord``\'s +``ssn`` field:: + + class PatientRecord(EmbeddedModel): + ssn = EncryptedCharField(max_length=11, queries={"queryType": "equality"}) + +You can perform a equality query just like you would on a non-encrypted field: + +.. code-block:: pycon + + >>> patient = Patient.objects.get(patient_record__ssn="123-45-6789") + >>> patient.name + 'John Doe' + +.. _qe-configuring-kms: + +Configuring the Key Management Service (KMS) +============================================ + +A local KMS provider with a hardcoded key is suitable for local development and +testing, but production environment, you should securely :ref:`store and manage +your encryption keys `. + +To use Queryable Encryption, you must configure a Key Management Service (KMS) +to store and manage the encryption keys used to encrypt and decrypt data. + +There are two primary configuration points: + +#. The ``kms_providers`` parameter of + :class:`~pymongo.encryption_options.AutoEncryptionOpts` (see the + ``kms_providers`` parameter in + :class:`~pymongo.encryption_options.AutoEncryptionOpts` for the available + providers (``aws``, ``azure``, ``gcp``, etc.) and provider options). + +#. The :setting:`KMS_CREDENTIALS ` inner option of + :setting:`DATABASES`. The keys for each provider are documented under the + ``master_key`` parameter of + :meth:`~pymongo.encryption.ClientEncryption.create_data_key`. + +Here's an example of KMS configuration with ``aws``:: + + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "encrypted": { + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + # ... + kms_providers={ + "aws": { + "accessKeyId": "your-access-key-id", + "secretAccessKey": "your-secret-access-key", + }, + }, + ), + }, + "KMS_CREDENTIALS": { + "aws": { + "key": "...", # Amazon Resource Name + "region": "...", # AWS region + }, + }, + }, + } + +(TODO: If there's a use case for multiple providers, motivate with a use case +and add a test.) + +If you've configured multiple KMS providers, you must define logic to determine +the provider for each model in your :ref:`database router +`:: + + class EncryptedRouter: + # ... + def kms_provider(self, model, **hints): + return "aws" + +.. _qe-configuring-encrypted-fields-map: + +Configuring the ``encrypted_fields_map`` option +=============================================== + +Encryption keys are created when you :ref:`run migrations for models that have +encrypted fields `. + +To see the encrypted fields map for your models (which includes the encryption +key IDs), run the :djadmin:`showencryptedfieldsmap` command:: + + $ python manage.py showencryptedfieldsmap --database encrypted + +In a production environment, it's recommended to include this map in your +settings to protect against a malicious server advertising a false encrypted +fields map:: + + from bson import json_util + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "encrypted": { + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + # ... + encrypted_fields_map=json_util.loads( + """{ + "encrypt_patient": { + "fields": [ + { + "bsonType": "string", + "path": "patient_record.ssn", + "keyId": { + "$binary": { + "base64": "2MA29LaARIOqymYHGmi2mQ==", + "subType": "04" + } + }, + "queries": { + "queryType": "equality" + } + }, + ] + }}""" + ), + ), + }, + }, + } + +Configuring the Automatic Encryption Shared Library +=================================================== + +The :ref:`manual:qe-reference-shared-library` is a preferred alternative to +:ref:`manual:qe-mongocryptd` and does not require you to start another process +to perform automatic encryption. + +In practice, if you use Atlas or Enterprise MongoDB, ``mongocryptd`` is already +configured for you, however in such cases the shared library is still +recommended for use with Queryable Encryption. + +You can :ref:`download the shared library +` from the +:ref:`manual:enterprise-official-packages`. The shared library is +platform‑specific. Make sure to download the correct version for your operating +system and architecture. + +To configure it in your Django settings, use +:class:`~pymongo.encryption_options.AutoEncryptionOpts`\'s +``crypt_shared_lib_path`` parameter:: + + from pymongo.encryption_options import AutoEncryptionOpts + + DATABASES = { + "encrypted": { + # ... + "OPTIONS": { + "auto_encryption_opts": AutoEncryptionOpts( + # ... + crypt_shared_lib_path="/path/to/mongo_crypt_shared_v1.dylib", + ) + }, + }, + } + +.. admonition:: Dynamic library path configuration + + If you encounter ``Pymongocrypt.errors.MongoCryptError: An existing + crypt_shared library is loaded by the application at + [/path/to/mongo_crypt_v1.so], but the current call to mongocrypt_init() + failed to find that same library.``, you probably need to configure an + environment variable so that your system can locate the library: + + +---------------+---------------------------------+ + | **Platform** | **Environment Variable** | + +---------------+---------------------------------+ + | Windows | ``PATH`` | + +---------------+---------------------------------+ + | macOS | ``DYLD_FALLBACK_LIBRARY_PATH`` | + +---------------+---------------------------------+ + | Linux | ``LD_LIBRARY_PATH`` | + +---------------+---------------------------------+ + + For example, on macOS you can set the ``DYLD_FALLBACK_LIBRARY_PATH`` + environment variable in your shell before starting your Django application:: + + $ export DYLD_FALLBACK_LIBRARY_PATH="/path/to/mongo_crypt_shared/:$DYLD_FALLBACK_LIBRARY_PATH" diff --git a/docs/index.rst b/docs/index.rst index a5a60e84f..04d209669 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -46,6 +46,7 @@ Models - :doc:`ref/database` - :doc:`ref/contrib/gis` - :doc:`ref/django-admin` +- :doc:`ref/models/encrypted-fields` **Topic guides:** diff --git a/docs/ref/django-admin.rst b/docs/ref/django-admin.rst index a491714cf..3f4858939 100644 --- a/docs/ref/django-admin.rst +++ b/docs/ref/django-admin.rst @@ -13,3 +13,20 @@ in the :setting:`INSTALLED_APPS` setting. Available commands ================== + +``showencryptedfieldsmap`` +-------------------------- + +.. versionadded:: 5.2.3 + +.. django-admin:: showencryptedfieldsmap + + This command generates output for includision in + :class:`~pymongo.encryption_options.AutoEncryptionOpts`\'s + ``encrypted_fields_map`` argument. + + See :ref:`qe-configuring-encrypted-fields-map`. + + .. django-admin-option:: --database DATABASE + + Specifies the database to use. Defaults to ``default``. diff --git a/docs/ref/index.rst b/docs/ref/index.rst index 94a11a2a8..47b27d466 100644 --- a/docs/ref/index.rst +++ b/docs/ref/index.rst @@ -9,5 +9,7 @@ API reference forms contrib/index database + models/encrypted-fields django-admin utils + settings diff --git a/docs/ref/models/encrypted-fields.rst b/docs/ref/models/encrypted-fields.rst new file mode 100644 index 000000000..c102ef03a --- /dev/null +++ b/docs/ref/models/encrypted-fields.rst @@ -0,0 +1,171 @@ +================ +Encrypted fields +================ + +.. currentmodule:: django_mongodb_backend.fields + +.. versionadded:: 5.2.3 + +To use encrypted fields, you must :doc:`configure Queryable Encryption +`. + +The following tables detailed which fields have encrypted counterparts. In all +cases, the encrypted field names are simply prefixed with ``Encrypted``, e.g. +``EncryptedCharField``. They are importable from +``django_mongodb_backend.fields``. + +.. csv-table:: ``django.db.models`` + :header: "Model Field", "Encrypted version available?" + + :class:`~django.db.models.BigIntegerField`, Yes + :class:`~django.db.models.BinaryField`, Yes + :class:`~django.db.models.BooleanField`, Yes + :class:`~django.db.models.CharField`, Yes + :class:`~django.db.models.DateField`, Yes + :class:`~django.db.models.DateTimeField`, Yes + :class:`~django.db.models.DecimalField`, Yes + :class:`~django.db.models.DurationField`, Yes + :class:`~django.db.models.EmailField`, Yes + :class:`~django.db.models.FileField`, No: the use case for encrypting this field is unclear. + :class:`~django.db.models.FilePathField`, No: the use case for encrypting this field is unclear. + :class:`~django.db.models.GenericIPAddressField`, Yes + :class:`~django.db.models.ImageField`, No: the use case for encrypting this field is unclear. + :class:`~django.db.models.IntegerField`, Yes + :class:`~django.db.models.JSONField`, No: ``JSONField`` isn't recommended. + :class:`~django.db.models.PositiveIntegerField`, Yes + :class:`~django.db.models.PositiveBigIntegerField`, Yes + :class:`~django.db.models.PositiveSmallIntegerField`, Yes + :class:`~django.db.models.SlugField`, No: it requires a unique index which Queryable Encryption doesn't support. + :class:`~django.db.models.SmallIntegerField`, Yes + :class:`~django.db.models.TimeField`, Yes + :class:`~django.db.models.TextField`, Yes + :class:`~django.db.models.URLField`, Yes + :class:`~django.db.models.UUIDField`, Yes + +.. csv-table:: ``django_mongodb_backend.fields`` + :header: "Model Field", "Encrypted version available?" + + :class:`ArrayField`, Yes + :class:`EmbeddedModelArrayField`, Yes + :class:`EmbeddedModelField`, Yes + :class:`ObjectIdField`, Yes + :class:`PolymorphicEmbeddedModelField`, No: may be implemented in the future. + :class:`PolymorphicEmbeddedModelArrayField`, No: may be implemented in the future. + +.. _encrypted-fields-queries: + +``EncryptedField.queries`` +-------------------------- + +Most encrypted fields* take an optional ``queries`` argument. It's a dictionary +that specifies the type of queries that can be performed on the field, as well +as any query options. + +The :ref:`available query types ` depend +on your version of MongoDB. For example, in MongoDB 8.0, the supported types +are ``equality`` and ``range``. + +.. admonition:: Query types vs. Django lookups + + Range queries in Queryable Encryption are different from Django's + :ref:`range lookups `. Range queries allow you to + perform comparisons on encrypted fields, while Django's range lookups are + used for filtering based on a range of values. + +\* These fields don't support the ``queries`` argument: + +- ``EncryptedArrayField`` +- ``EncryptedEmbeddedModelArrayField`` +- ``EncryptedEmbeddedModelField`` + +Embedded model encryption +========================= + +There are two ways to encrypt embedded models. You can either encrypt the +entire subdocument, in which case you can't query any the subdocuments fields, +or you can encrypt only selected fields of the subdocument. + +Encrypting the entire subdocument +--------------------------------- + +To encrypt a subdocument, use ``EncryptedEmbeddedModelField`` or +``EncryptedEmbeddedModelArrayField``. In this case, the field's embedded model +cannot have any encrypted fields. + +Encrypting selected fields of a subdocument +------------------------------------------- + +To encrypt only select fields of a subdocument, use :class:`EmbeddedModelField` +and any of the other encrypted fields on the embedded model. + +MongoDB doesn't support encrypting selected fields of +``EmbeddedModelArrayField``. + +Limitations +=========== + +MongoDB imposes some restrictions on encrypted fields: + +* They cannot be indexed. +* They cannot be part of a unique constraint. +* They cannot be null. + +``QuerySet`` limitations +------------------------ + +In addition to :ref:`Django MongoDB Backend's QuerySet limitations +`, some ``QuerySet`` methods aren't +supported on encrypted fields. Each unsupported method is followed by a sample +error message from the database. Depending on the exact query, error messages +may vary. + +- :meth:`~django.db.models.query.QuerySet.order_by`: Cannot add an encrypted + field as a prefix of another encrypted field. +- :meth:`~django.db.models.query.QuerySet.alias`, + :meth:`~django.db.models.query.QuerySet.annotate`, + :meth:`~django.db.models.query.QuerySet.distinct`: Cannot group on field + '_id.value' which is encrypted with the random algorithm or whose encryption + properties are not known until runtime. +- :meth:`~django.db.models.query.QuerySet.dates`, + :meth:`~django.db.models.query.QuerySet.datetimes`: If the value type is a + date, the type of the index must also be date (and vice versa). +- :meth:`~django.db.models.query.QuerySet.in_bulk`: Encrypted fields can't have + unique constraints. + +# TODO: add details about joined queries after +https://github.com/mongodb/django-mongodb-backend/pull/443 is finalized. + +There are also several ``QuerySet`` methods that aren't permitted on any models +(regardless of whether or not they have encrypted fields) that use a database +connection with Automatic Encryption. Each unsupported method is followed by a +sample error message from the database. + +- :meth:`~django.db.models.query.QuerySet.update`: Multi-document updates are + not allowed with Queryable Encryption. +- :meth:`~django.db.models.query.QuerySet.aggregate`, + :meth:`~django.db.models.query.QuerySet.count`: Aggregation stage + $internalFacetTeeConsumer is not allowed or supported with automatic + encryption. +- :meth:`~django.db.models.query.QuerySet.union`: Aggregation stage $unionWith + is not allowed or supported with automatic encryption. + +``EncryptedFieldMixin`` +======================= + +.. class:: EncryptedFieldMixin + + .. versionadded:: 5.2.3 + + Use this mixin to create encrypted versions of your own custom fields. For + example, to create an encrypted version of ``MyField``:: + + from django.db import models + from django_mongodb_backend.fields import EncryptedFieldMixin + from myapp.fields import MyField + + + class MyEncryptedField(EncryptedFieldMixin, MyField): + pass + + This adds the :ref:`queries ` argument to the + field. diff --git a/docs/ref/settings.rst b/docs/ref/settings.rst new file mode 100644 index 000000000..6aba8a75a --- /dev/null +++ b/docs/ref/settings.rst @@ -0,0 +1,43 @@ +======== +Settings +======== + +Queryable Encryption +==================== + +The following :setting:`django:DATABASES` inner options support configuration of +Key Management Service (KMS) credentials for Queryable Encryption. + +.. setting:: DATABASE-KMS-CREDENTIALS + +``KMS_CREDENTIALS`` +------------------- + +Default: ``{}`` (empty dictionary) + +A dictionary of Key Management Service (KMS) credential key-value pairs. These +credentials are required to access your KMS provider (such as AWS KMS, Azure Key +Vault, or GCP KMS) for encrypting and decrypting data using Queryable +Encryption. + +For example after :doc:`/howto/queryable-encryption`, to configure AWS KMS, +Azure Key Vault, or GCP KMS credentials, you can set ``KMS_CREDENTIALS`` in +your :setting:`django:DATABASES` settings as follows: + +.. code-block:: python + + DATABASES["encrypted"]["KMS_CREDENTIALS"] = { + "aws": { + "key": os.getenv("AWS_KEY_ARN", ""), + "region": os.getenv("AWS_KEY_REGION", ""), + }, + "azure": { + "key": os.getenv("AZURE_KEY_VAULT_URL", ""), + "client_id": os.getenv("AZURE_CLIENT_ID", ""), + "client_secret": os.getenv("AZURE_CLIENT_SECRET", ""), + }, + "gcp": { + "key": os.getenv("GCP_KEY_NAME", ""), + "project_id": os.getenv("GCP_PROJECT_ID", ""), + }, + } diff --git a/docs/ref/utils.rst b/docs/ref/utils.rst index 5cdb0ccf3..0312c1f89 100644 --- a/docs/ref/utils.rst +++ b/docs/ref/utils.rst @@ -48,3 +48,37 @@ following parts can be considered stable. But for maximum flexibility, construct :setting:`DATABASES` manually as described in :ref:`configuring-databases-setting`. + +``model_has_encrypted_fields()`` +================================= + +.. function:: model_has_encrypted_fields(model) + + .. versionadded:: 5.2.3 + + Returns ``True`` if the given Django model has any fields that use + encrypted models. + + Example usage in a :ref:`database router + `:: + + from django_mongodb_backend.utils import model_has_encrypted_fields + + class EncryptedRouter: + def db_for_read(self, model, **hints): + if model_has_encrypted_fields(model): + return "encrypted" + return "default" + + def db_for_write(self, model, **hints): + if model_has_encrypted_fields(model): + return "encrypted" + return "default" + + def allow_migrate(self, db, app_label, model_name=None, **hints): + if hints.get("model"): + if model_has_encrypted_fields(hints["model"]): + return db == "encrypted" + else: + return db == "default" + return None diff --git a/docs/topics/known-issues.rst b/docs/topics/known-issues.rst index e4c401a3c..5e3256f76 100644 --- a/docs/topics/known-issues.rst +++ b/docs/topics/known-issues.rst @@ -26,6 +26,8 @@ Model fields - :class:`~django.db.models.CompositePrimaryKey` - :class:`~django.db.models.GeneratedField` +.. _known-issues-limitations-querying: + Querying ======== diff --git a/pyproject.toml b/pyproject.toml index 0549f02ef..b4f4841ce 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,7 @@ docs = [ "furo>=2025.7.19", "sphinx-copybutton", ] +encryption = ["pymongo[encryption]"] [project.urls] Homepage = "https://www.mongodb.org" diff --git a/tests/backend_/test_features.py b/tests/backend_/test_features.py index 05959fa70..d505c7fab 100644 --- a/tests/backend_/test_features.py +++ b/tests/backend_/test_features.py @@ -44,3 +44,83 @@ def mocked_command(command): with patch("pymongo.synchronous.database.Database.command", wraps=mocked_command): self.assertIs(connection.features._supports_transactions, False) + + +class SupportsQueryableEncryptionTests(TestCase): + def setUp(self): + # Clear the cached property. + connection.features.__dict__.pop("supports_queryable_encryption", None) + # Must initialize the feature before patching it. + connection.features._supports_transactions # noqa: B018 + + def tearDown(self): + del connection.features.supports_queryable_encryption + + @staticmethod + def enterprise_response(command): + if command == "buildInfo": + return {"modules": ["enterprise"]} + raise Exception("Unexpected command") + + @staticmethod + def non_enterprise_response(command): + if command == "buildInfo": + return {"modules": []} + raise Exception("Unexpected command") + + def test_supported_on_atlas(self): + """Supported on MongoDB 8.0+ Atlas replica set or sharded cluster.""" + with ( + patch( + "pymongo.synchronous.database.Database.command", wraps=self.non_enterprise_response + ), + patch("django.db.connection.features.supports_atlas_search", True), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, True) + + def test_supported_on_enterprise(self): + """Supported on MongoDB 8.0+ Enterprise replica set or sharded cluster.""" + with ( + patch("pymongo.synchronous.database.Database.command", wraps=self.enterprise_response), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, True) + + def test_atlas_or_enterprise_required(self): + """Not supported on MongoDB Community Edition.""" + with ( + patch( + "pymongo.synchronous.database.Database.command", wraps=self.non_enterprise_response + ), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, False) + + def test_transactions_required(self): + """ + Not supported if database isn't a replica set or sharded cluster + (i.e. DatabaseFeatures._supports_transactions = False). + """ + with ( + patch("pymongo.synchronous.database.Database.command", wraps=self.enterprise_response), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", False), + patch("django.db.connection.features.is_mongodb_8_0", True), + ): + self.assertIs(connection.features.supports_queryable_encryption, False) + + def test_mongodb_8_0_required(self): + """Not supported on MongoDB < 8.0""" + with ( + patch("pymongo.synchronous.database.Database.command", wraps=self.enterprise_response), + patch("django.db.connection.features.supports_atlas_search", False), + patch("django.db.connection.features._supports_transactions", True), + patch("django.db.connection.features.is_mongodb_8_0", False), + ): + self.assertIs(connection.features.supports_queryable_encryption, False) diff --git a/tests/encryption_/__init__.py b/tests/encryption_/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/encryption_/models.py b/tests/encryption_/models.py new file mode 100644 index 000000000..995e4760c --- /dev/null +++ b/tests/encryption_/models.py @@ -0,0 +1,184 @@ +from django.db import models + +from django_mongodb_backend.fields import ( + EmbeddedModelField, + EncryptedArrayField, + EncryptedBigIntegerField, + EncryptedBinaryField, + EncryptedBooleanField, + EncryptedCharField, + EncryptedDateField, + EncryptedDateTimeField, + EncryptedDecimalField, + EncryptedDurationField, + EncryptedEmailField, + EncryptedEmbeddedModelArrayField, + EncryptedEmbeddedModelField, + EncryptedFloatField, + EncryptedGenericIPAddressField, + EncryptedIntegerField, + EncryptedObjectIdField, + EncryptedPositiveBigIntegerField, + EncryptedPositiveIntegerField, + EncryptedPositiveSmallIntegerField, + EncryptedSmallIntegerField, + EncryptedTextField, + EncryptedTimeField, + EncryptedURLField, + EncryptedUUIDField, +) +from django_mongodb_backend.models import EmbeddedModel + + +class Author(models.Model): + name = models.CharField(max_length=255) + + +class Book(models.Model): + title = models.CharField(max_length=255) + author = models.ForeignKey(Author, models.CASCADE) + + +class EncryptedTestModel(models.Model): + class Meta: + abstract = True + required_db_features = {"supports_queryable_encryption"} + + +# Array models +class ArrayModel(EncryptedTestModel): + values = EncryptedArrayField( + models.IntegerField(), + size=5, + ) + + +# Embedded models +class Patient(EncryptedTestModel): + patient_name = models.CharField(max_length=255) + patient_id = models.BigIntegerField() + patient_record = EmbeddedModelField("PatientRecord") + + def __str__(self): + return f"{self.patient_name} ({self.patient_id})" + + +class PatientRecord(EmbeddedModel): + ssn = EncryptedCharField(max_length=11, queries={"queryType": "equality"}) + billing = EncryptedEmbeddedModelField("Billing") + bill_amount = models.DecimalField(max_digits=10, decimal_places=2) + + +class Billing(EmbeddedModel): + cc_type = models.CharField(max_length=50) + cc_number = models.CharField(max_length=20) + + +# Embedded array models +class Actor(EmbeddedModel): + name = models.CharField(max_length=100) + + +class Movie(EncryptedTestModel): + title = models.CharField(max_length=200) + plot = models.TextField(blank=True) + runtime = models.IntegerField(default=0) + released = models.DateTimeField("release date") + cast = EncryptedEmbeddedModelArrayField(Actor) + + def __str__(self): + return self.title + + +# Equality-queryable field models +class BinaryModel(EncryptedTestModel): + value = EncryptedBinaryField(queries={"queryType": "equality"}) + + +class BooleanModel(EncryptedTestModel): + value = EncryptedBooleanField(queries={"queryType": "equality"}) + + +class CharModel(EncryptedTestModel): + value = EncryptedCharField(max_length=255, queries={"queryType": "equality"}) + + +class EmailModel(EncryptedTestModel): + value = EncryptedEmailField(max_length=255, queries={"queryType": "equality"}) + + +class GenericIPAddressModel(EncryptedTestModel): + value = EncryptedGenericIPAddressField(queries={"queryType": "equality"}) + + +class ObjectIdModel(EncryptedTestModel): + value = EncryptedObjectIdField(queries={"queryType": "equality"}) + + +class TextModel(EncryptedTestModel): + value = EncryptedTextField(queries={"queryType": "equality"}) + + +class URLModel(EncryptedTestModel): + value = EncryptedURLField(max_length=500, queries={"queryType": "equality"}) + + +class UUIDModel(EncryptedTestModel): + value = EncryptedUUIDField(queries={"queryType": "equality"}) + + +# Range-queryable field models +class BigIntegerModel(EncryptedTestModel): + value = EncryptedBigIntegerField(queries={"queryType": "range"}) + + +class DateModel(EncryptedTestModel): + value = EncryptedDateField(queries={"queryType": "range"}) + + +class DateTimeModel(EncryptedTestModel): + value = EncryptedDateTimeField(queries={"queryType": "range"}) + + +class DecimalModel(EncryptedTestModel): + value = EncryptedDecimalField(max_digits=10, decimal_places=2, queries={"queryType": "range"}) + + +class DurationModel(EncryptedTestModel): + value = EncryptedDurationField(queries={"queryType": "range"}) + + +class FloatModel(EncryptedTestModel): + value = EncryptedFloatField(queries={"queryType": "range"}) + + +class IntegerModel(EncryptedTestModel): + value = EncryptedIntegerField(queries={"queryType": "range"}) + + +class PositiveBigIntegerModel(EncryptedTestModel): + value = EncryptedPositiveBigIntegerField(queries={"queryType": "range"}) + + +class PositiveIntegerModel(EncryptedTestModel): + value = EncryptedPositiveIntegerField(queries={"queryType": "range"}) + + +class PositiveSmallIntegerModel(EncryptedTestModel): + value = EncryptedPositiveSmallIntegerField(queries={"queryType": "range"}) + + +class SmallIntegerModel(EncryptedTestModel): + value = EncryptedSmallIntegerField(queries={"queryType": "range"}) + + +class TimeModel(EncryptedTestModel): + value = EncryptedTimeField(queries={"queryType": "range"}) + + +class EncryptionKey(models.Model): + key_alt_name = models.CharField(max_length=500, db_column="keyAltNames") + + class Meta: + db_table = "__keyVault" + managed = False diff --git a/tests/encryption_/test_base.py b/tests/encryption_/test_base.py new file mode 100644 index 000000000..0c165d19a --- /dev/null +++ b/tests/encryption_/test_base.py @@ -0,0 +1,21 @@ +import pymongo +from bson.binary import Binary +from django.conf import settings +from django.db import connections +from django.test import TestCase, skipUnlessDBFeature + + +@skipUnlessDBFeature("supports_queryable_encryption") +class EncryptionTestCase(TestCase): + databases = {"default", "encrypted"} + maxDiff = None + + def assertEncrypted(self, model, field): + # Access encrypted database from an unencrypted connection + conn_params = connections["default"].get_connection_params() + db_name = settings.DATABASES["encrypted"]["NAME"] + with pymongo.MongoClient(**conn_params) as new_connection: + db = new_connection[db_name] + collection = db[model._meta.db_table] + data = collection.find_one({}, {field: 1, "_id": 0}) + self.assertIsInstance(data[field], Binary) diff --git a/tests/encryption_/test_fields.py b/tests/encryption_/test_fields.py new file mode 100644 index 000000000..47231c5d0 --- /dev/null +++ b/tests/encryption_/test_fields.py @@ -0,0 +1,413 @@ +import datetime +import uuid +from decimal import Decimal +from operator import attrgetter + +from bson import ObjectId +from django.db import DatabaseError +from django.db.models import Avg + +from django_mongodb_backend.fields import ( + EncryptedArrayField, + EncryptedCharField, + EncryptedEmbeddedModelArrayField, + EncryptedEmbeddedModelField, + EncryptedIntegerField, +) + +from .models import ( + Actor, + ArrayModel, + BigIntegerModel, + Billing, + BinaryModel, + Book, + BooleanModel, + CharModel, + DateModel, + DateTimeModel, + DecimalModel, + DurationModel, + EmailModel, + FloatModel, + GenericIPAddressModel, + IntegerModel, + Movie, + ObjectIdModel, + Patient, + PatientRecord, + PositiveBigIntegerModel, + PositiveIntegerModel, + PositiveSmallIntegerModel, + SmallIntegerModel, + TextModel, + TimeModel, + URLModel, + UUIDModel, +) +from .test_base import EncryptionTestCase + + +class ArrayModelTests(EncryptionTestCase): + def setUp(self): + self.array_model = ArrayModel.objects.create(values=[1, 2, 3, 4, 5]) + + def test_array(self): + array_model = ArrayModel.objects.get(id=self.array_model.id) + self.assertEqual(array_model.values, [1, 2, 3, 4, 5]) + self.assertEncrypted(self.array_model, "values") + + +class EmbeddedModelTests(EncryptionTestCase): + def setUp(self): + self.billing = Billing(cc_type="Visa", cc_number="4111111111111111") + self.patient_record = PatientRecord(ssn="123-45-6789", billing=self.billing) + self.patient = Patient.objects.create( + patient_name="John Doe", patient_id=123456789, patient_record=self.patient_record + ) + + def test_object(self): + patient = Patient.objects.get(id=self.patient.id) + self.assertEqual(patient.patient_record.ssn, "123-45-6789") + self.assertEqual(patient.patient_record.billing.cc_type, "Visa") + self.assertEqual(patient.patient_record.billing.cc_number, "4111111111111111") + + +class EmbeddedModelArrayTests(EncryptionTestCase): + def setUp(self): + self.actor1 = Actor(name="Actor One") + self.actor2 = Actor(name="Actor Two") + self.movie = Movie.objects.create( + title="Sample Movie", + cast=[self.actor1, self.actor2], + released=datetime.date(2024, 6, 1), + ) + + def test_array(self): + movie = Movie.objects.get(id=self.movie.id) + self.assertEqual(len(movie.cast), 2) + self.assertEqual(movie.cast[0].name, "Actor One") + self.assertEqual(movie.cast[1].name, "Actor Two") + self.assertEncrypted(movie, "cast") + + +class FieldTests(EncryptionTestCase): + def assertEquality(self, model_cls, val): + model_cls.objects.create(value=val) + fetched = model_cls.objects.get(value=val) + self.assertEqual(fetched.value, val) + + def assertRange(self, model_cls, *, low, high, threshold): + model_cls.objects.create(value=low) + model_cls.objects.create(value=high) + self.assertEqual(model_cls.objects.get(value=low).value, low) + self.assertEqual(model_cls.objects.get(value=high).value, high) + objs = list(model_cls.objects.filter(value__gt=threshold)) + self.assertEqual(len(objs), 1) + self.assertEqual(objs[0].value, high) + + # Equality-only fields + def test_binary(self): + self.assertEquality(BinaryModel, b"\x00\x01\x02") + self.assertEncrypted(BinaryModel, "value") + + def test_boolean(self): + self.assertEquality(BooleanModel, True) + self.assertEncrypted(BooleanModel, "value") + + def test_char(self): + self.assertEquality(CharModel, "hello") + self.assertEncrypted(CharModel, "value") + + def test_email(self): + self.assertEquality(EmailModel, "test@example.com") + self.assertEncrypted(EmailModel, "value") + + def test_ip(self): + self.assertEquality(GenericIPAddressModel, "192.168.0.1") + self.assertEncrypted(GenericIPAddressModel, "value") + + def test_objectid(self): + self.assertEquality(ObjectIdModel, ObjectId()) + self.assertEncrypted(ObjectIdModel, "value") + + def test_text(self): + self.assertEquality(TextModel, "some text") + self.assertEncrypted(TextModel, "value") + + def test_url(self): + self.assertEquality(URLModel, "https://example.com") + self.assertEncrypted(URLModel, "value") + + def test_uuid(self): + self.assertEquality(UUIDModel, uuid.uuid4()) + self.assertEncrypted(UUIDModel, "value") + + # Range fields + def test_big_integer(self): + self.assertRange(BigIntegerModel, low=100, high=200, threshold=150) + self.assertEncrypted(BigIntegerModel, "value") + + def test_date(self): + self.assertRange( + DateModel, + low=datetime.date(2024, 6, 1), + high=datetime.date(2024, 6, 10), + threshold=datetime.date(2024, 6, 5), + ) + self.assertEncrypted(DateModel, "value") + + def test_datetime(self): + self.assertRange( + DateTimeModel, + low=datetime.datetime(2024, 6, 1, 12, 0), + high=datetime.datetime(2024, 6, 2, 12, 0), + threshold=datetime.datetime(2024, 6, 2, 0, 0), + ) + self.assertEncrypted(DateTimeModel, "value") + + def test_decimal(self): + self.assertRange( + DecimalModel, + low=Decimal("123.45"), + high=Decimal("200.50"), + threshold=Decimal("150"), + ) + self.assertEncrypted(DecimalModel, "value") + + def test_duration(self): + self.assertRange( + DurationModel, + low=datetime.timedelta(days=3), + high=datetime.timedelta(days=10), + threshold=datetime.timedelta(days=5), + ) + self.assertEncrypted(DurationModel, "value") + + def test_float(self): + self.assertRange(FloatModel, low=1.23, high=4.56, threshold=3.0) + self.assertEncrypted(FloatModel, "value") + + def test_integer(self): + self.assertRange(IntegerModel, low=5, high=10, threshold=7) + self.assertEncrypted(IntegerModel, "value") + + def test_positive_big_integer(self): + self.assertRange(PositiveBigIntegerModel, low=100, high=500, threshold=200) + self.assertEncrypted(PositiveBigIntegerModel, "value") + + def test_positive_integer(self): + self.assertRange(PositiveIntegerModel, low=10, high=20, threshold=15) + self.assertEncrypted(PositiveIntegerModel, "value") + + def test_positive_small_integer(self): + self.assertRange(PositiveSmallIntegerModel, low=5, high=8, threshold=6) + self.assertEncrypted(PositiveSmallIntegerModel, "value") + + def test_small_integer(self): + self.assertRange(SmallIntegerModel, low=-5, high=2, threshold=0) + self.assertEncrypted(SmallIntegerModel, "value") + + def test_time(self): + self.assertRange( + TimeModel, + low=datetime.time(10, 0), + high=datetime.time(15, 0), + threshold=datetime.time(12, 0), + ) + self.assertEncrypted(TimeModel, "value") + + +class QueryTests(EncryptionTestCase): + def test_aggregate(self): + msg = ( + "Aggregation stage $internalFacetTeeConsumer is not allowed or " + "supported with automatic encryption." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.aggregate(Avg("value"))) + + def test_alias(self): + msg = ( + "Cannot group on field '_id.value' which is encrypted with the " + "random algorithm or whose encryption properties are not known " + "until runtime" + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.alias(avg=Avg("value"))) + + def test_annotate(self): + msg = ( + "Cannot group on field '_id.value' which is encrypted with the " + "random algorithm or whose encryption properties are not known " + "until runtime" + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(IntegerModel.objects.annotate(avg=Avg("value"))) + + def test_bulk_create(self): + CharModel.objects.bulk_create([CharModel(value="abc"), CharModel(value="xyz")]) + self.assertQuerySetEqual( + CharModel.objects.order_by("pk"), ["abc", "xyz"], attrgetter("value") + ) + + def test_bulk_update(self): + objs = [ + CharModel.objects.create(value="abc"), + CharModel.objects.create(value="xyz"), + ] + objs[0].value = "def" + objs[1].value = "mno" + msg = "Multi-document updates are not allowed with Queryable Encryption" + with self.assertRaisesMessage(DatabaseError, msg): + CharModel.objects.bulk_update(objs, ["value"]) + + def test_contains(self): + obj = CharModel.objects.create(value="abc") + self.assertIs(CharModel.objects.contains(obj), True) + + def test_count(self): + msg = ( + "Aggregation stage $internalFacetTeeConsumer is not allowed or " + "supported with automatic encryption." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(CharModel.objects.count()) + + def test_dates(self): + msg = ( + "If the value type is a date, the type of the index must also be date (and vice versa)." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(DateModel.objects.dates("value", "year")) + + def test_datetimes(self): + msg = ( + "If the value type is a date, the type of the index must also be date (and vice versa)." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(DateTimeModel.objects.datetimes("value", "year")) + + def test_distinct(self): + msg = ( + "Cannot group on field '_id.value' which is encrypted with the " + "random algorithm or whose encryption properties are not known " + "until runtime" + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(CharModel.objects.distinct("value")) + + def test_exclude(self): + obj1 = CharModel.objects.create(value="abc") + obj2 = CharModel.objects.create(value="xyz") + self.assertSequenceEqual(CharModel.objects.exclude(value=obj1.value), [obj2]) + + def test_exists(self): + self.assertIs(CharModel.objects.exists(), False) + + def test_get_or_create(self): + obj1, created1 = CharModel.objects.get_or_create(value="abc") + self.assertIs(created1, True) + obj2, created2 = CharModel.objects.get_or_create(value="abc") + self.assertIs(created2, False) + self.assertEqual(obj1, obj2) + + def test_join(self): + msg = ( + "Non-empty 'let' field is not allowed in the $lookup aggregation " + "stage over an encrypted collection." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(Book.objects.filter(author__name="xxx")) + + def test_order_by(self): + msg = "Cannot add an encrypted field as a prefix of another encrypted field" + with self.assertRaisesMessage(DatabaseError, msg): + list(CharModel.objects.order_by("value")) + + def test_select_related(self): + msg = ( + "Non-empty 'let' field is not allowed in the $lookup aggregation " + "stage over an encrypted collection." + ) + with self.assertRaisesMessage(DatabaseError, msg): + list(Book.objects.select_related("author")) + + def test_update(self): + msg = "Multi-document updates are not allowed with Queryable Encryption" + with self.assertRaisesMessage(DatabaseError, msg): + self.assertEqual(CharModel.objects.update(value="xyz"), 1) + + def test_update_or_create(self): + CharModel.objects.create(value="xyz") + msg = "Multi-document updates are not allowed with Queryable Encryption" + with self.assertRaisesMessage(DatabaseError, msg): + CharModel.objects.update_or_create(value="xyz", defaults={"plain": "abc"}) + + def test_union(self): + msg = "Aggregation stage $unionWith is not allowed or supported with automatic encryption." + qs1 = IntegerModel.objects.filter(value__gt=1) + qs2 = IntegerModel.objects.filter(value__gte=8) + with self.assertRaisesMessage(DatabaseError, msg): + list(qs1.union(qs2)) + + def test_values(self): + list(CharModel.objects.values("value")) + + def test_values_list(self): + list(CharModel.objects.values_list("value")) + + +class FieldMixinTests(EncryptionTestCase): + def test_db_index(self): + msg = "'db_index=True' is not supported on encrypted fields." + with self.assertRaisesMessage(ValueError, msg): + EncryptedIntegerField(db_index=True) + + def test_null(self): + msg = "'null=True' is not supported on encrypted fields." + with self.assertRaisesMessage(ValueError, msg): + EncryptedIntegerField(null=True) + + def test_unique(self): + msg = "'unique=True' is not supported on encrypted fields." + with self.assertRaisesMessage(ValueError, msg): + EncryptedIntegerField(unique=True) + + def test_deconstruct_preserves_queries_and_rewrites_path(self): + field = EncryptedCharField(max_length=50, queries={"field": "value"}) + field.name = "ssn" + name, path, args, kwargs = field.deconstruct() + + # Name is preserved + self.assertEqual(name, "ssn") + + # Path is rewritten from 'encrypted_model' to regular fields path + self.assertEqual(path, "django_mongodb_backend.fields.EncryptedCharField") + + # No positional args for CharField + self.assertEqual(args, []) + + # Queries value is preserved in kwargs + self.assertIn("queries", kwargs) + self.assertEqual(kwargs["queries"], {"field": "value"}) + + # Reconstruct from deconstruct output + new_field = EncryptedCharField(*args, **kwargs) + + # Reconstructed field is equivalent + self.assertEqual(new_field.queries, field.queries) + self.assertIsNot(new_field, field) + self.assertEqual(new_field.max_length, field.max_length) + + def test_fields_without_queries(self): + """Some field types (array, object) can't be queried.""" + for field in ( + EncryptedArrayField, + EncryptedEmbeddedModelField, + EncryptedEmbeddedModelArrayField, + ): + with self.subTest(field=field): + msg = f"{field.__name__} does not support the queries argument." + with self.assertRaisesMessage(ValueError, msg): + field(Actor, queries={}) diff --git a/tests/encryption_/test_management.py b/tests/encryption_/test_management.py new file mode 100644 index 000000000..096ccab3a --- /dev/null +++ b/tests/encryption_/test_management.py @@ -0,0 +1,131 @@ +from io import StringIO + +from bson import json_util +from django.core.exceptions import ImproperlyConfigured +from django.core.management import call_command +from django.db import connections +from django.test import modify_settings + +from .models import EncryptionKey +from .test_base import EncryptionTestCase + + +@modify_settings(INSTALLED_APPS={"prepend": "django_mongodb_backend"}) +class CommandTests(EncryptionTestCase): + # Expected encrypted field maps for all Encrypted* models + expected_maps = { + "encryption__patient": { + "fields": [ + { + "bsonType": "string", + "path": "patient_record.ssn", + "queries": {"queryType": "equality"}, + }, + {"bsonType": "object", "path": "patient_record.billing"}, + ] + }, + # Equality-queryable fields + "encryption__binarymodel": { + "fields": [ + {"bsonType": "binData", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__booleanmodel": { + "fields": [{"bsonType": "bool", "path": "value", "queries": {"queryType": "equality"}}] + }, + "encryption__charmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__emailmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__genericipaddressmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__textmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "encryption__urlmodel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + # Range-queryable fields + "encryption__bigintegermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__datemodel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__datetimemodel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__decimalmodel": { + "fields": [{"bsonType": "decimal", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__durationmodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__floatmodel": { + "fields": [{"bsonType": "double", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__integermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__positivebigintegermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__positiveintegermodel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__positivesmallintegermodel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__smallintegermodel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "encryption__timemodel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + } + + def _compare_output(self, expected, actual): + for field in actual["fields"]: + del field["keyId"] # Can't compare dynamic value + self.assertEqual(expected, actual) + + def test_show_encrypted_fields_map(self): + out = StringIO() + call_command("showencryptedfieldsmap", "--database", "encrypted", verbosity=0, stdout=out) + command_output = json_util.loads(out.getvalue()) + + # Loop through each expected model + for model_key, expected in self.expected_maps.items(): + with self.subTest(model=model_key): + self.assertIn(model_key, command_output) + self._compare_output(expected, command_output[model_key]) + + def test_missing_key(self): + test_key = "encryption__patient.patient_record.ssn" + msg = ( + f"Encryption key {test_key} not found. Have migrated the " + " model?" + ) + EncryptionKey.objects.filter(key_alt_name=test_key).delete() + try: + with self.assertRaisesMessage(ImproperlyConfigured, msg): + call_command("showencryptedfieldsmap", "--database", "encrypted", verbosity=0) + finally: + # Replace the deleted key. + connections["encrypted"].client_encryption.create_data_key( + kms_provider="local", + key_alt_names=[test_key], + ) diff --git a/tests/encryption_/test_schema.py b/tests/encryption_/test_schema.py new file mode 100644 index 000000000..ae1803389 --- /dev/null +++ b/tests/encryption_/test_schema.py @@ -0,0 +1,152 @@ +from bson.binary import Binary +from django.core.exceptions import ImproperlyConfigured +from django.db import connections + +from . import models +from .models import EncryptionKey +from .test_base import EncryptionTestCase + + +class SchemaTests(EncryptionTestCase): + # Expected encrypted fields map per model + expected_map = { + "Patient": { + "fields": [ + { + "bsonType": "string", + "path": "patient_record.ssn", + "queries": {"queryType": "equality"}, + }, + {"bsonType": "object", "path": "patient_record.billing"}, + ] + }, + "BinaryModel": { + "fields": [ + {"bsonType": "binData", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "BooleanModel": { + "fields": [{"bsonType": "bool", "path": "value", "queries": {"queryType": "equality"}}] + }, + "CharModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "EmailModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "GenericIPAddressModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "TextModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "URLModel": { + "fields": [ + {"bsonType": "string", "path": "value", "queries": {"queryType": "equality"}} + ] + }, + "BigIntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "DateModel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "DateTimeModel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + "DecimalModel": { + "fields": [{"bsonType": "decimal", "path": "value", "queries": {"queryType": "range"}}] + }, + "DurationModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "FloatModel": { + "fields": [{"bsonType": "double", "path": "value", "queries": {"queryType": "range"}}] + }, + "IntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "PositiveBigIntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "PositiveIntegerModel": { + "fields": [{"bsonType": "long", "path": "value", "queries": {"queryType": "range"}}] + }, + "PositiveSmallIntegerModel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "SmallIntegerModel": { + "fields": [{"bsonType": "int", "path": "value", "queries": {"queryType": "range"}}] + }, + "TimeModel": { + "fields": [{"bsonType": "date", "path": "value", "queries": {"queryType": "range"}}] + }, + } + + def test_get_encrypted_fields_all_models(self): + """ + Loops through all models, + checks their encrypted fields map from the schema editor, + and compares to expected BSON type & queries mapping. + """ + # Deleting all keys is only correct only if this test includes all + # test models. This test may not be needed since it's tested when the + # test runner migrates all models. If any subTest fails, the key vault + # will be left in an inconsistent state. + EncryptionKey.objects.all().delete() + connection = connections["encrypted"] + for model_name, expected in self.expected_map.items(): + with self.subTest(model=model_name): + model_class = getattr(models, model_name) + with connection.schema_editor() as editor: + encrypted_fields = editor._get_encrypted_fields(model_class) + for field in encrypted_fields["fields"]: + del field["keyId"] # Can't compare dynamic value + self.assertEqual(encrypted_fields, expected) + + def test_key_creation_and_lookup(self): + """ + Use _get_encrypted_fields to + generate and store a data key in the vault, then + query the vault with the keyAltName. + """ + model_class = models.CharModel + test_key_alt_name = f"{model_class._meta.db_table}.value" + # Delete the test key and verify it's gone. + EncryptionKey.objects.filter(key_alt_name=test_key_alt_name).delete() + with self.assertRaises(EncryptionKey.DoesNotExist): + EncryptionKey.objects.get(key_alt_name=test_key_alt_name) + # Regenerate the keyId. + with connections["encrypted"].schema_editor() as editor: + encrypted_fields = editor._get_encrypted_fields(model_class) + # Validate schema contains a keyId for the field. + field_info = encrypted_fields["fields"][0] + self.assertEqual(field_info["path"], "value") + self.assertIsInstance(field_info["keyId"], Binary) + # Lookup in key vault by the keyAltName. + key = EncryptionKey.objects.get(key_alt_name=test_key_alt_name) + self.assertEqual(key.id, field_info["keyId"]) + self.assertEqual(key.key_alt_name, [test_key_alt_name]) + + def test_missing_auto_encryption_opts(self): + connection = connections["default"] + msg = ( + "Tried to create model encryption_.Patient in 'default' database. " + "The model has encrypted fields but DATABASES['default']['OPTIONS'] " + 'is missing the "auto_encryption_opts" parameter. If the model ' + "should not be created in this database, adjust your database " + "routers." + ) + with ( + self.assertRaisesMessage(ImproperlyConfigured, msg), + connection.schema_editor() as editor, + ): + editor.create_model(models.Patient) diff --git a/tests/raw_query_/test_raw_aggregate.py b/tests/raw_query_/test_raw_aggregate.py index 99dcd5faf..96df2f925 100644 --- a/tests/raw_query_/test_raw_aggregate.py +++ b/tests/raw_query_/test_raw_aggregate.py @@ -111,7 +111,7 @@ def assertAnnotations(self, results, expected_annotations): self.assertEqual(getattr(result, annotation), value) def test_rawqueryset_repr(self): - queryset = RawQuerySet(pipeline=[]) + queryset = RawQuerySet(pipeline=[], model=Book) self.assertEqual(repr(queryset), "") self.assertEqual(repr(queryset.query), "")