From 4714d259da4e228d02bd30032b860cec4444bccb Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 1 Nov 2025 17:52:35 +0100 Subject: [PATCH 1/8] auditlog: switch to pghistory --- .github/workflows/integration-tests.yml | 6 - .github/workflows/rest-framework-tests.yml | 5 - .github/workflows/unit-tests.yml | 8 - docs/content/en/open_source/upgrading/2.53.md | 38 +++ dojo/apps.py | 3 +- dojo/auditlog.py | 192 ++++--------- .../management/commands/pghistory_backfill.py | 5 +- .../commands/pghistory_backfill_fast.py | 5 +- dojo/management/commands/pghistory_clear.py | 5 +- dojo/middleware.py | 17 -- dojo/settings/settings.dist.py | 21 +- dojo/views.py | 2 +- unittests/test_auditlog.py | 261 ++++-------------- unittests/test_flush_auditlog.py | 87 ++++-- unittests/test_importers_performance.py | 115 +------- 15 files changed, 230 insertions(+), 540 deletions(-) create mode 100644 docs/content/en/open_source/upgrading/2.53.md diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 140c4f2befd..b5eb523a167 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -2,18 +2,12 @@ name: Integration tests on: workflow_call: - inputs: - auditlog_type: - type: string - default: "django-auditlog" jobs: integration_tests: # run tests with docker compose name: User Interface Tests runs-on: ubuntu-latest - env: - AUDITLOG_TYPE: ${{ inputs.auditlog_type }} strategy: matrix: test-case: [ diff --git a/.github/workflows/rest-framework-tests.yml b/.github/workflows/rest-framework-tests.yml index 23aa9a0af0c..2ae71ad8164 100644 --- a/.github/workflows/rest-framework-tests.yml +++ b/.github/workflows/rest-framework-tests.yml @@ -6,16 +6,11 @@ on: platform: type: string default: "linux/amd64" - auditlog_type: - type: string - default: "django-auditlog" jobs: unit_tests: name: Rest Framework Unit Tests runs-on: ${{ inputs.platform == 'linux/arm64' && 'ubuntu-24.04-arm' || 'ubuntu-latest' }} - env: - AUDITLOG_TYPE: ${{ inputs.auditlog_type }} strategy: matrix: diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index cbda2b40caf..e16990520df 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -25,26 +25,18 @@ jobs: strategy: matrix: platform: ['linux/amd64', 'linux/arm64'] - auditlog_type: ['django-auditlog', 'django-pghistory'] fail-fast: false needs: build-docker-containers uses: ./.github/workflows/rest-framework-tests.yml secrets: inherit with: platform: ${{ matrix.platform}} - auditlog_type: ${{ matrix.auditlog_type }} # only run integration tests for linux/amd64 (default) test-user-interface: needs: build-docker-containers uses: ./.github/workflows/integration-tests.yml secrets: inherit - strategy: - matrix: - auditlog_type: ['django-auditlog', 'django-pghistory'] - fail-fast: false - with: - auditlog_type: ${{ matrix.auditlog_type }} # only run k8s tests for linux/amd64 (default) test-k8s: diff --git a/docs/content/en/open_source/upgrading/2.53.md b/docs/content/en/open_source/upgrading/2.53.md new file mode 100644 index 00000000000..e72751787ea --- /dev/null +++ b/docs/content/en/open_source/upgrading/2.53.md @@ -0,0 +1,38 @@ +--- +title: "Upgrading to DefectDojo Version 2.53.x" +toc_hide: true +weight: -20251101 +description: Removal of django-auditlog and exclusive use of django-pghistory for audit logging. +--- + +## Breaking Change: Removal of django-auditlog + +Starting with DefectDojo 2.53, `django-auditlog` support has been removed in favour of `django-pghistory`. +This is designed to be a backwards compatible change, unless: +- You're querying the database directly for auditlog events, or, +- You've set the `DD_AUDITLOG_TYPE` environment variable (or `AUDITLOG_TYPE` settings field) + +### Required Actions + +If you're using `DD_AUDITLOG_TYPE`, remove it from your configuration/environment. + +### Existing Records Preserved + +Historical audit log entries stored in the `auditlog_logentry` table will continue to be displayed in the action history view for backward compatibility. No data migration is required. + +### Benefits of django-pghistory + +The switch to `django-pghistory` provides several advantages: + +- **Better performance**: Database-level triggers reduce overhead compared to Django signal-based auditing +- **More features**: Enhanced context tracking and better support for complex queries +- **Better data integrity**: PostgreSQL-native implementation ensures consistency + +### Migration Notes + +- A one-time data migration will take place to populate the `django-pghistory` tables with the initial snapshot of the tracked models. + +--- + +Check the [Release Notes](https://github.com/DefectDojo/django-DefectDojo/releases/tag/2.53.0) for the complete contents of this release. + diff --git a/dojo/apps.py b/dojo/apps.py index f1b2769f760..06733c29771 100644 --- a/dojo/apps.py +++ b/dojo/apps.py @@ -91,7 +91,8 @@ def ready(self): # Configure audit system after all models are loaded # This must be done in ready() to avoid "Models aren't loaded yet" errors # Note: pghistory models are registered here (no database access), but trigger - # enabling is handled via management command to avoid database access warnings + # enabling is handled in the entrpoint script to avoid database access warnings + # during startup register_django_pghistory_models() configure_audit_system() diff --git a/dojo/auditlog.py b/dojo/auditlog.py index 1996fc1cca4..d0e1354086c 100644 --- a/dojo/auditlog.py +++ b/dojo/auditlog.py @@ -1,11 +1,11 @@ """ Audit logging configuration for DefectDojo. -This module handles conditional registration of models with either django-auditlog -or django-pghistory based on the DD_AUDITLOG_TYPE setting. +This module handles registration of models with django-pghistory. +django-auditlog support has been removed. """ -import contextlib import logging +import os import sys import pghistory @@ -80,13 +80,6 @@ def _flush_models_in_batches(models_to_flush, timestamp_field: str, retention_pe return total_deleted, total_batches, reached_any_limit -def _flush_django_auditlog(retention_period: int, batch_size: int, max_batches: int, *, dry_run: bool = False) -> tuple[int, int, bool]: - # Import inside to avoid model import issues at startup - from auditlog.models import LogEntry # noqa: PLC0415 - - return _flush_models_in_batches([LogEntry], "timestamp", retention_period, batch_size, max_batches, dry_run=dry_run) - - def _iter_pghistory_event_models(): """Yield pghistory Event models registered under the dojo app.""" for model in apps.get_app_config("dojo").get_models(): @@ -107,8 +100,7 @@ def run_flush_auditlog(retention_period: int | None = None, *, dry_run: bool = False) -> tuple[int, int, bool]: """ - Deletes audit entries older than the configured retention from both - django-auditlog and django-pghistory log entries. + Deletes audit entries older than the configured retention from django-pghistory log entries. Returns a tuple of (deleted_total, batches_done, reached_limit). """ @@ -121,93 +113,13 @@ def run_flush_auditlog(retention_period: int | None = None, max_batches = max_batches if max_batches is not None else getattr(settings, "AUDITLOG_FLUSH_MAX_BATCHES", 100) phase = "DRY RUN" if dry_run else "Cleanup" - logger.info("Running %s for django-auditlog entries with %d Months retention across all backends", phase, retention_period) - d_deleted, d_batches, d_limit = _flush_django_auditlog(retention_period, batch_size, max_batches, dry_run=dry_run) logger.info("Running %s for django-pghistory entries with %d Months retention across all backends", phase, retention_period) p_deleted, p_batches, p_limit = _flush_pghistory_events(retention_period, batch_size, max_batches, dry_run=dry_run) - total_deleted = d_deleted + p_deleted - total_batches = d_batches + p_batches - reached_limit = bool(d_limit or p_limit) - verb = "would delete" if dry_run else "deleted" - logger.info("Audit flush summary: django-auditlog %s=%s batches=%s; pghistory %s=%s batches=%s; total_%s=%s total_batches=%s", - verb, d_deleted, d_batches, verb, p_deleted, p_batches, verb.replace(" ", "_"), total_deleted, total_batches) - - return total_deleted, total_batches, reached_limit - - -def enable_django_auditlog(): - """Enable django-auditlog by registering models.""" - # Import inside function to avoid AppRegistryNotReady errors - from auditlog.registry import auditlog # noqa: PLC0415 - - from dojo.models import ( # noqa: PLC0415 - Cred_User, - Dojo_User, - Endpoint, - Engagement, - Finding, - Finding_Group, - Finding_Template, - Notification_Webhooks, - Product, - Product_Type, - Risk_Acceptance, - Test, - ) - - logger.info("Enabling django-auditlog: Registering models") - auditlog.register(Dojo_User, exclude_fields=["password"]) - auditlog.register(Endpoint) - auditlog.register(Engagement) - auditlog.register(Finding, m2m_fields={"reviewers"}) - auditlog.register(Finding_Group) - auditlog.register(Product_Type) - auditlog.register(Product) - auditlog.register(Test) - auditlog.register(Risk_Acceptance) - auditlog.register(Finding_Template) - auditlog.register(Cred_User, exclude_fields=["password"]) - auditlog.register(Notification_Webhooks, exclude_fields=["header_name", "header_value"]) - logger.info("Successfully enabled django-auditlog") - - -def disable_django_auditlog(): - """Disable django-auditlog by unregistering models.""" - # Import inside function to avoid AppRegistryNotReady errors - from auditlog.registry import auditlog # noqa: PLC0415 - - from dojo.models import ( # noqa: PLC0415 - Cred_User, - Dojo_User, - Endpoint, - Engagement, - Finding, - Finding_Group, - Finding_Template, - Notification_Webhooks, - Product, - Product_Type, - Risk_Acceptance, - Test, - ) + logger.info("Audit flush summary: pghistory %s=%s batches=%s", verb, p_deleted, p_batches) - # Only log during actual application startup, not during shell commands - if "shell" not in sys.argv: - logger.info("Django-auditlog disabled - unregistering models") - - # Unregister all models from auditlog - models_to_unregister = [ - Dojo_User, Endpoint, Engagement, Finding, Finding_Group, - Product_Type, Product, Test, Risk_Acceptance, Finding_Template, - Cred_User, Notification_Webhooks, - ] - - for model in models_to_unregister: - with contextlib.suppress(Exception): - # Model might not be registered, ignore the error - auditlog.unregister(model) + return p_deleted, p_batches, bool(p_limit) def register_django_pghistory_models(): @@ -308,6 +220,26 @@ def register_django_pghistory_models(): }, )(Finding) + # # Track the reviewers ManyToMany relationship through table + # # This tracks additions/removals of reviewers from findings + # reviewers_through = Finding._meta.get_field("reviewers").remote_field.through + # if reviewers_through: + # logger.info(f"Tracking reviewers M2M through table: {reviewers_through} (db_table: {reviewers_through._meta.db_table})") + # pghistory.track( + # pghistory.InsertEvent(), + # pghistory.DeleteEvent(), + # meta={ + # "indexes": [ + # models.Index(fields=["pgh_created_at"]), + # models.Index(fields=["pgh_label"]), + # models.Index(fields=["pgh_context_id"]), + # ], + # }, + # )(reviewers_through) + # logger.info("Successfully registered pghistory tracking for reviewers through table") + # else: + # logger.warning("Could not find reviewers through table for Finding model!") + pghistory.track( pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), @@ -427,30 +359,6 @@ def register_django_pghistory_models(): logger.info("Successfully registered models with django-pghistory") -def enable_django_pghistory(): - """Enable django-pghistory by enabling triggers.""" - logger.info("Enabling django-pghistory: Enabling triggers") - - # Enable pghistory triggers - try: - call_command("pgtrigger", "enable") - logger.info("Successfully enabled pghistory triggers") - except Exception as e: - logger.warning(f"Failed to enable pgtrigger triggers: {e}") - # Don't raise the exception as this shouldn't prevent Django from starting - - -def disable_django_pghistory(): - """Disable django-pghistory by disabling triggers.""" - logger.info("Disabling django-pghistory: Disabling triggers") - try: - call_command("pgtrigger", "disable") - logger.info("Successfully disabled pghistory triggers") - except Exception as e: - logger.warning(f"Failed to disable pgtrigger triggers: {e}") - # Don't raise the exception as this shouldn't prevent Django from starting - - def configure_pghistory_triggers(): """ Configure pghistory triggers based on audit settings. @@ -466,44 +374,52 @@ def configure_pghistory_triggers(): except Exception as e: logger.error(f"Failed to disable pghistory triggers: {e}") raise - elif settings.AUDITLOG_TYPE == "django-pghistory": + else: + # Only pghistory is supported now try: call_command("pgtrigger", "enable") logger.info("Successfully enabled pghistory triggers") except Exception as e: logger.error(f"Failed to enable pghistory triggers: {e}") raise - else: - try: - call_command("pgtrigger", "disable") - logger.info("Successfully disabled pghistory triggers") - except Exception as e: - logger.error(f"Failed to disable pghistory triggers: {e}") - raise def configure_audit_system(): """ Configure the audit system based on settings. - Note: This function only handles auditlog registration. pghistory model registration - is handled in apps.py, and trigger management should be done via the - configure_pghistory_triggers() function to avoid database access during initialization. + django-auditlog is no longer supported. Only django-pghistory is allowed. """ # Only log during actual application startup, not during shell commands log_enabled = "shell" not in sys.argv + # Fail if DD_AUDITLOG_TYPE is still configured (removed setting) + auditlog_type_env = os.environ.get("DD_AUDITLOG_TYPE") + if auditlog_type_env: + error_msg = ( + "DD_AUDITLOG_TYPE environment variable is no longer supported. " + "DefectDojo now exclusively uses django-pghistory for audit logging. " + "Please remove DD_AUDITLOG_TYPE from your environment configuration. " + "All new audit entries will be created using django-pghistory automatically." + ) + logger.error(error_msg) + raise ValueError(error_msg) + + # Fail if AUDITLOG_TYPE is manually set in settings files (removed setting) + if hasattr(settings, "AUDITLOG_TYPE"): + error_msg = ( + "AUDITLOG_TYPE setting is no longer supported. " + "DefectDojo now exclusively uses django-pghistory for audit logging. " + "Please remove AUDITLOG_TYPE from your settings file (settings.dist.py or local_settings.py). " + "All new audit entries will be created using django-pghistory automatically." + ) + logger.error(error_msg) + raise ValueError(error_msg) + if not settings.ENABLE_AUDITLOG: if log_enabled: logger.info("Audit logging disabled") - disable_django_auditlog() return - if settings.AUDITLOG_TYPE == "django-auditlog": - if log_enabled: - logger.info("Configuring audit system: django-auditlog enabled") - enable_django_auditlog() - else: - if log_enabled: - logger.info("django-auditlog disabled (pghistory or other audit type selected)") - disable_django_auditlog() + if log_enabled: + logger.info("Audit logging configured: django-pghistory") diff --git a/dojo/management/commands/pghistory_backfill.py b/dojo/management/commands/pghistory_backfill.py index 456cbe75c5d..637073728bc 100644 --- a/dojo/management/commands/pghistory_backfill.py +++ b/dojo/management/commands/pghistory_backfill.py @@ -140,11 +140,10 @@ def disable_db_logging(self): ) def handle(self, *args, **options): - if not settings.ENABLE_AUDITLOG or settings.AUDITLOG_TYPE != "django-pghistory": + if not settings.ENABLE_AUDITLOG: self.stdout.write( self.style.WARNING( - "pghistory is not enabled. Set DD_ENABLE_AUDITLOG=True and " - "DD_AUDITLOG_TYPE=django-pghistory", + "pghistory is not enabled. Set DD_ENABLE_AUDITLOG=True", ), ) return diff --git a/dojo/management/commands/pghistory_backfill_fast.py b/dojo/management/commands/pghistory_backfill_fast.py index a2f1921fc74..f517ad6704b 100644 --- a/dojo/management/commands/pghistory_backfill_fast.py +++ b/dojo/management/commands/pghistory_backfill_fast.py @@ -457,11 +457,10 @@ def disable_db_logging(self): ) def handle(self, *args, **options): - if not settings.ENABLE_AUDITLOG or settings.AUDITLOG_TYPE != "django-pghistory": + if not settings.ENABLE_AUDITLOG: self.stdout.write( self.style.WARNING( - "pghistory is not enabled. Set DD_ENABLE_AUDITLOG=True and " - "DD_AUDITLOG_TYPE=django-pghistory", + "pghistory is not enabled. Set DD_ENABLE_AUDITLOG=True", ), ) return diff --git a/dojo/management/commands/pghistory_clear.py b/dojo/management/commands/pghistory_clear.py index a2593ac25ca..97b938293d7 100644 --- a/dojo/management/commands/pghistory_clear.py +++ b/dojo/management/commands/pghistory_clear.py @@ -35,11 +35,10 @@ def add_arguments(self, parser): ) def handle(self, *args, **options): - if not settings.ENABLE_AUDITLOG or settings.AUDITLOG_TYPE != "django-pghistory": + if not settings.ENABLE_AUDITLOG: self.stdout.write( self.style.WARNING( - "pghistory is not enabled. Set DD_ENABLE_AUDITLOG=True and " - "DD_AUDITLOG_TYPE=django-pghistory", + "pghistory is not enabled. Set DD_ENABLE_AUDITLOG=True", ), ) return diff --git a/dojo/middleware.py b/dojo/middleware.py index 5d63b1a35a0..534fccb59a5 100644 --- a/dojo/middleware.py +++ b/dojo/middleware.py @@ -6,13 +6,10 @@ from urllib.parse import quote import pghistory.middleware -from auditlog.context import set_actor -from auditlog.middleware import AuditlogMiddleware as _AuditlogMiddleware from django.conf import settings from django.db import models from django.http import HttpResponseRedirect from django.urls import reverse -from django.utils.functional import SimpleLazyObject from watson.middleware import SearchContextMiddleware from watson.search import search_context_manager @@ -180,20 +177,6 @@ def __call__(self, request): return self.get_response(request) -# This solution comes from https://github.com/jazzband/django-auditlog/issues/115#issuecomment-1539262735 -# It fix situation when TokenAuthentication is used in API. Otherwise, actor in AuditLog would be set to None -class AuditlogMiddleware(_AuditlogMiddleware): - def __call__(self, request): - remote_addr = self._get_remote_addr(request) - - user = SimpleLazyObject(lambda: getattr(request, "user", None)) - - context = set_actor(actor=user, remote_addr=remote_addr) - - with context: - return self.get_response(request) - - class PgHistoryMiddleware(pghistory.middleware.HistoryMiddleware): """ diff --git a/dojo/settings/settings.dist.py b/dojo/settings/settings.dist.py index 2ca0c60b462..41b9f379b9b 100644 --- a/dojo/settings/settings.dist.py +++ b/dojo/settings/settings.dist.py @@ -31,7 +31,7 @@ DD_SITE_URL=(str, "http://localhost:8080"), DD_DEBUG=(bool, False), DD_DJANGO_DEBUG_TOOLBAR_ENABLED=(bool, False), - # django-auditlog imports django-jsonfield-backport raises a warning that can be ignored, + # django-jsonfield-backport raises a warning that can be ignored, # see https://github.com/laymonage/django-jsonfield-backport # debug_toolbar.E001 is raised when running tests in dev mode via run-unittests.sh DD_SILENCED_SYSTEM_CHECKS=(list, ["debug_toolbar.E001", "django_jsonfield_backport.W001"]), @@ -321,12 +321,9 @@ DD_DEDUPLICATION_ALGORITHM_PER_PARSER=(str, ""), # Dictates whether cloud banner is created or not DD_CREATE_CLOUD_BANNER=(bool, True), - # With this setting turned on, Dojo maintains an audit log of changes made to entities (Findings, Tests, Engagements, Procuts, ...) - # If you run big import you may want to disable this because the way django-auditlog currently works, there's - # a big performance hit. Especially during (re-)imports. + # With this setting turned on, Dojo maintains an audit log of changes made to entities (Findings, Tests, Engagements, Products, ...) + # If you run big import you may want to disable this because there's a performance hit during (re-)imports. DD_ENABLE_AUDITLOG=(bool, True), - # Audit logging system: "django-auditlog" (default) or "django-pghistory" - DD_AUDITLOG_TYPE=(str, "django-auditlog"), # Specifies whether the "first seen" date of a given report should be used over the "last seen" date DD_USE_FIRST_SEEN=(bool, False), # When set to True, use the older version of the qualys parser that is a more heavy handed in setting severity @@ -943,7 +940,6 @@ def generate_url(scheme, double_slashes, user, password, host, port, path, param "dojo.middleware.AdditionalHeaderMiddleware", "social_django.middleware.SocialAuthExceptionMiddleware", "crum.CurrentRequestUserMiddleware", - "dojo.middleware.AuditlogMiddleware", "dojo.middleware.AsyncSearchContextMiddleware", "dojo.request_cache.middleware.RequestCacheMiddleware", "dojo.middleware.LongRunningRequestAlertMiddleware", @@ -1948,9 +1944,6 @@ def saml2_attrib_map_format(din): # ------------------------------------------------------------------------------ AUDITLOG_FLUSH_RETENTION_PERIOD = env("DD_AUDITLOG_FLUSH_RETENTION_PERIOD") ENABLE_AUDITLOG = env("DD_ENABLE_AUDITLOG") -AUDITLOG_TYPE = env("DD_AUDITLOG_TYPE") -AUDITLOG_TWO_STEP_MIGRATION = False -AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT = False AUDITLOG_FLUSH_BATCH_SIZE = env("DD_AUDITLOG_FLUSH_BATCH_SIZE") AUDITLOG_FLUSH_MAX_BATCHES = env("DD_AUDITLOG_FLUSH_MAX_BATCHES") @@ -2046,12 +2039,8 @@ def show_toolbar(request): middleware_list = list(MIDDLEWARE) crum_index = middleware_list.index("crum.CurrentRequestUserMiddleware") - if AUDITLOG_TYPE == "django-auditlog": - # Insert AuditlogMiddleware before CurrentRequestUserMiddleware - middleware_list.insert(crum_index, "dojo.middleware.AuditlogMiddleware") - elif AUDITLOG_TYPE == "django-pghistory": - # Insert pghistory HistoryMiddleware before CurrentRequestUserMiddleware - middleware_list.insert(crum_index, "dojo.middleware.PgHistoryMiddleware") + # Insert pghistory HistoryMiddleware before CurrentRequestUserMiddleware + middleware_list.insert(crum_index, "dojo.middleware.PgHistoryMiddleware") MIDDLEWARE = middleware_list diff --git a/dojo/views.py b/dojo/views.py index ae2303e3ea4..d6b33f8c08b 100644 --- a/dojo/views.py +++ b/dojo/views.py @@ -135,7 +135,7 @@ def action_history(request, cid, oid): else: product_tab.setEngagement(object_value.engagement) - # Get audit history from both systems separately + # Get audit history from pghistory (and legacy django-auditlog entries if available) auditlog_history = [] pghistory_history = [] diff --git a/unittests/test_auditlog.py b/unittests/test_auditlog.py index c748758cc67..f0cf3dbfcb3 100644 --- a/unittests/test_auditlog.py +++ b/unittests/test_auditlog.py @@ -1,22 +1,18 @@ """ Unit tests for audit configuration functionality. -Tests the dual-audit system where both django-auditlog and django-pghistory -can coexist, allowing users to see historical data from both systems. +Tests pghistory audit system configuration and event creation. """ +import os from unittest.mock import MagicMock, patch -from auditlog.models import LogEntry from django.apps import apps +from django.conf import settings from django.test import TestCase, override_settings from dojo.auditlog import ( configure_audit_system, configure_pghistory_triggers, - disable_django_auditlog, - disable_django_pghistory, - enable_django_auditlog, - enable_django_pghistory, register_django_pghistory_models, ) from dojo.models import Product_Type @@ -26,23 +22,6 @@ class TestAuditConfig(TestCase): """Test audit configuration functionality.""" - @patch("auditlog.registry.auditlog") - def test_enable_django_auditlog(self, mock_auditlog): - """Test that enable_django_auditlog registers models.""" - # Mock the auditlog registry - mock_auditlog.register = MagicMock() - - enable_django_auditlog() - - # Verify that register was called multiple times (once for each model) - self.assertTrue(mock_auditlog.register.called) - self.assertGreater(mock_auditlog.register.call_count, 5) - - def test_disable_django_auditlog(self): - """Test that disable_django_auditlog runs without error.""" - # This should not raise an exception - disable_django_auditlog() - @patch("dojo.auditlog.pghistory") def test_register_django_pghistory_models(self, mock_pghistory): """Test that register_django_pghistory_models registers all models.""" @@ -60,69 +39,38 @@ def test_register_django_pghistory_models(self, mock_pghistory): self.assertGreater(mock_pghistory.track.call_count, 5) @patch("dojo.auditlog.call_command") - def test_enable_django_pghistory(self, mock_call_command): - """Test that enable_django_pghistory enables triggers only.""" - enable_django_pghistory() + def test_configure_pghistory_triggers_enabled(self, mock_call_command): + """Test that configure_pghistory_triggers enables triggers when audit logging is enabled.""" + with override_settings(ENABLE_AUDITLOG=True): + configure_pghistory_triggers() # Verify that pgtrigger enable command was called mock_call_command.assert_called_with("pgtrigger", "enable") @patch("dojo.auditlog.call_command") - def test_disable_django_pghistory(self, mock_call_command): - """Test that disable_django_pghistory disables triggers.""" - disable_django_pghistory() + def test_configure_pghistory_triggers_disabled(self, mock_call_command): + """Test that configure_pghistory_triggers disables triggers when audit logging is disabled.""" + with override_settings(ENABLE_AUDITLOG=False): + configure_pghistory_triggers() # Verify that pgtrigger disable command was called - mock_call_command.assert_called_once_with("pgtrigger", "disable") - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="invalid-type") - @patch("dojo.auditlog.disable_django_auditlog") - @patch("dojo.auditlog.call_command") - def test_invalid_audit_type_warning(self, mock_call_command, mock_disable_auditlog): - """Test that invalid audit types disable both audit systems.""" - # Call the main configuration function with invalid type - configure_audit_system() - configure_pghistory_triggers() - - # Verify that auditlog is disabled for invalid type - mock_disable_auditlog.assert_called_once() - # Verify that pghistory triggers are also disabled for invalid type mock_call_command.assert_called_with("pgtrigger", "disable") - # This test mainly ensures no exceptions are raised - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") - @patch("dojo.auditlog.enable_django_auditlog") - @patch("dojo.auditlog.disable_django_auditlog") - @patch("dojo.auditlog.call_command") - def test_dual_audit_system_coexistence(self, mock_call_command, mock_disable_auditlog, mock_enable_auditlog): - """Test that audit system configuration handles pghistory type correctly.""" - # Call the main configuration function + @override_settings(ENABLE_AUDITLOG=True) + def test_configure_audit_system_enabled(self): + """Test that configure_audit_system configures pghistory when audit logging is enabled.""" + # Should not raise an exception configure_audit_system() - configure_pghistory_triggers() - - # Verify auditlog is disabled when pghistory is the chosen type - mock_disable_auditlog.assert_called_once() - # Verify auditlog is not enabled when pghistory is chosen - mock_enable_auditlog.assert_not_called() - # Verify that pghistory triggers are enabled when pghistory is the chosen type - mock_call_command.assert_called_with("pgtrigger", "enable") - - # This demonstrates that the system correctly chooses the configured audit type - def test_separate_history_lists_approach(self): - """Test that the dual-history approach creates separate lists correctly.""" - # This test verifies the new approach where we maintain separate history lists - # instead of mixing audit data from different systems - - # Import the view function to test the separation logic - - # This is more of a structural test to ensure the approach is sound - # The actual view testing would require more complex setup + @override_settings(ENABLE_AUDITLOG=False) + def test_configure_audit_system_disabled(self): + """Test that configure_audit_system handles disabled audit logging.""" + # Should not raise an exception + configure_audit_system() - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") + @override_settings(ENABLE_AUDITLOG=True) def test_pghistory_insert_event_creation(self): - """Test that pghistory creates insert events when a Product_Type is created and auditlog does not.""" + """Test that pghistory creates insert events when a Product_Type is created.""" # Configure audit system for pghistory configure_audit_system() configure_pghistory_triggers() @@ -133,9 +81,6 @@ def test_pghistory_insert_event_creation(self): # Count existing events before creating new Product_Type initial_event_count = ProductTypeEvent.objects.count() - # Clear any existing audit log entries for Product_Type - LogEntry.objects.filter(content_type__model="product_type").delete() - # Create a new Product_Type product_type = Product_Type.objects.create( name="Test Product Type for pghistory", @@ -159,139 +104,41 @@ def test_pghistory_insert_event_creation(self): "Event should contain the Product_Type description") # Verify it's an insert event (check if pgh_label indicates creation) - # The label could be 'insert' or contain insert-related information self.assertIsNotNone(latest_event.pgh_created_at, "Event should have a creation timestamp") - # Verify that NO auditlog entries were created (mutual exclusivity) - audit_entries = LogEntry.objects.filter( - content_type__model="product_type", - object_id=product_type.id, - ) - self.assertEqual(audit_entries.count(), 0, - "Expected NO auditlog entries when pghistory is enabled") - # Clean up product_type.delete() - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog") - @patch("dojo.auditlog.enable_django_auditlog") - @patch("dojo.auditlog.call_command") - def test_configure_audit_system_auditlog_enabled(self, mock_call_command, mock_enable_auditlog): - """Test that configure_audit_system enables auditlog and configures pghistory triggers correctly.""" - configure_audit_system() - configure_pghistory_triggers() - - # Verify that auditlog is enabled - mock_enable_auditlog.assert_called_once() - # Verify that pghistory triggers are disabled when auditlog is the chosen type - mock_call_command.assert_called_with("pgtrigger", "disable") - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") - @patch("dojo.auditlog.disable_django_auditlog") - @patch("dojo.auditlog.call_command") - def test_configure_audit_system_pghistory_enabled(self, mock_call_command, mock_disable_auditlog): - """Test that configure_audit_system disables auditlog and enables pghistory triggers correctly.""" - configure_audit_system() - configure_pghistory_triggers() - - # Verify that auditlog is disabled when pghistory is the chosen type - mock_disable_auditlog.assert_called_once() - # Verify that pghistory triggers are enabled when pghistory is the chosen type - mock_call_command.assert_called_with("pgtrigger", "enable") - - @override_settings(ENABLE_AUDITLOG=False) - @patch("dojo.auditlog.disable_django_auditlog") - @patch("dojo.auditlog.call_command") - def test_configure_audit_system_all_disabled(self, mock_call_command, mock_disable_auditlog): - """Test that configure_audit_system disables both auditlog and pghistory when audit is disabled.""" - configure_audit_system() - configure_pghistory_triggers() - - # Verify that auditlog is disabled when audit logging is disabled - mock_disable_auditlog.assert_called_once() - # Verify that pghistory triggers are also disabled when audit logging is disabled - mock_call_command.assert_called_with("pgtrigger", "disable") - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="unknown-type") - @patch("dojo.auditlog.disable_django_auditlog") - @patch("dojo.auditlog.call_command") - def test_configure_audit_system_unknown_type(self, mock_call_command, mock_disable_auditlog): - """Test that configure_audit_system disables both systems for unknown audit types.""" - configure_audit_system() - configure_pghistory_triggers() - - # Verify that auditlog is disabled for unknown types - mock_disable_auditlog.assert_called_once() - # Verify that pghistory triggers are also disabled for unknown types - mock_call_command.assert_called_with("pgtrigger", "disable") - - @patch("dojo.auditlog.call_command") - def test_disable_pghistory_command_failure(self, mock_call_command): - """Test that disable_django_pghistory handles command failures gracefully.""" - # Simulate command failure - mock_call_command.side_effect = Exception("Command failed") - - # This should not raise an exception - disable_django_pghistory() - - # Verify that call_command was attempted - mock_call_command.assert_called_once_with("pgtrigger", "disable") - - @patch("dojo.auditlog.call_command") - def test_enable_pghistory_command_failure(self, mock_call_command): - """Test that enable_django_pghistory handles command failures gracefully.""" - # Simulate command failure for trigger enable - mock_call_command.side_effect = Exception("Command failed") - - # This should not raise an exception - enable_django_pghistory() - - # Verify that call_command was attempted - mock_call_command.assert_called_with("pgtrigger", "enable") - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog") - def test_auditlog_insert_event_creation(self): - """Test that django-auditlog creates audit log entries when a Product_Type is created and pghistory does not.""" - # Configure audit system for auditlog - configure_audit_system() - configure_pghistory_triggers() - - # Get the Product_Type event model for pghistory check - ProductTypeEvent = apps.get_model("dojo", "Product_TypeEvent") - - # Clear any existing audit log entries for Product_Type - LogEntry.objects.filter(content_type__model="product_type").delete() - - # Count existing pghistory events - initial_pghistory_count = ProductTypeEvent.objects.count() - - # Create a new Product_Type - product_type = Product_Type.objects.create( - name="Test Product Type for Auditlog", - description="Test description for auditlog verification", - ) - - # Verify that an audit log entry was created - audit_entries = LogEntry.objects.filter( - content_type__model="product_type", - object_id=product_type.id, - action=LogEntry.Action.CREATE, - ) - - self.assertEqual(audit_entries.count(), 1, - "Expected exactly one audit log entry for Product_Type creation") - - audit_entry = audit_entries.first() - self.assertEqual(audit_entry.object_repr, str(product_type), - "Audit entry should represent the created object") - self.assertIsNotNone(audit_entry.timestamp, - "Audit entry should have a timestamp") - - # Verify that NO pghistory events were created (mutual exclusivity) - final_pghistory_count = ProductTypeEvent.objects.count() - self.assertEqual(final_pghistory_count, initial_pghistory_count, - "Expected NO new pghistory events when auditlog is enabled") - - # Clean up - product_type.delete() + def test_configure_audit_system_fails_with_dd_auditlog_type_env(self): + """Test that configure_audit_system fails if DD_AUDITLOG_TYPE environment variable is set.""" + # Temporarily set the environment variable + original_value = os.environ.get("DD_AUDITLOG_TYPE") + try: + os.environ["DD_AUDITLOG_TYPE"] = "django-pghistory" + with self.assertRaises(ValueError) as context: + configure_audit_system() + self.assertIn("DD_AUDITLOG_TYPE", str(context.exception)) + finally: + # Restore original value + if original_value is None: + os.environ.pop("DD_AUDITLOG_TYPE", None) + else: + os.environ["DD_AUDITLOG_TYPE"] = original_value + + def test_configure_audit_system_fails_with_auditlog_type_setting(self): + """Test that configure_audit_system fails if AUDITLOG_TYPE setting is manually set.""" + # Temporarily add the setting + original_value = getattr(settings, "AUDITLOG_TYPE", None) + try: + settings.AUDITLOG_TYPE = "django-pghistory" + with self.assertRaises(ValueError) as context: + configure_audit_system() + self.assertIn("AUDITLOG_TYPE", str(context.exception)) + finally: + # Restore original value + if original_value is None: + if hasattr(settings, "AUDITLOG_TYPE"): + delattr(settings, "AUDITLOG_TYPE") + else: + settings.AUDITLOG_TYPE = original_value diff --git a/unittests/test_flush_auditlog.py b/unittests/test_flush_auditlog.py index 1c7f5ef08df..bf139e8e144 100644 --- a/unittests/test_flush_auditlog.py +++ b/unittests/test_flush_auditlog.py @@ -1,11 +1,15 @@ -import logging -from datetime import UTC, date, datetime +""" +Unit tests for flush_auditlog functionality. -from auditlog.models import LogEntry +Tests the flush_auditlog management command and task that removes old audit log entries. +""" +import logging +from datetime import UTC, datetime from dateutil.relativedelta import relativedelta +from django.apps import apps from django.test import override_settings -from dojo.models import Finding +from dojo.models import Product_Type from dojo.tasks import flush_auditlog from .dojo_test_case import DojoTestCase @@ -18,32 +22,71 @@ class TestFlushAuditlog(DojoTestCase): @override_settings(AUDITLOG_FLUSH_RETENTION_PERIOD=-1) def test_flush_auditlog_disabled(self): - entries_before = LogEntry.objects.all().count() + """Test that flush_auditlog does nothing when retention period is -1 (disabled).""" + # Get pghistory event model + ProductTypeEvent = apps.get_model("dojo", "Product_TypeEvent") + entries_before = ProductTypeEvent.objects.count() + flush_auditlog() - entries_after = LogEntry.objects.all().count() + + entries_after = ProductTypeEvent.objects.count() self.assertEqual(entries_before, entries_after) @override_settings(AUDITLOG_FLUSH_RETENTION_PERIOD=0) def test_delete_all_entries(self): - entries_before = LogEntry.objects.filter(timestamp__date__lt=date.today()).count() + """Test that flush_auditlog deletes all entries when retention period is 0.""" + # Get pghistory event model + ProductTypeEvent = apps.get_model("dojo", "Product_TypeEvent") + + # Create a test product type to generate events + product_type = Product_Type.objects.create( + name="Test Product Type for Flush", + description="Test description", + ) + + # Flush with retention period 0 (delete all) flush_auditlog() - entries_after = LogEntry.objects.filter(timestamp__date__lt=date.today()).count() - # we have three old log entries in our testdata - self.assertEqual(entries_before - 3, entries_after) + + # All entries should be deleted + entries_after = ProductTypeEvent.objects.count() + self.assertEqual(entries_after, 0, "All entries should be deleted when retention period is 0") + + # Clean up + product_type.delete() @override_settings(AUDITLOG_FLUSH_RETENTION_PERIOD=1) def test_delete_entries_with_retention_period(self): - entries_before = LogEntry.objects.filter(timestamp__date__lt=datetime.now(UTC)).count() - two_weeks_ago = datetime.now(UTC) - relativedelta(weeks=2) - log_entry = LogEntry.objects.log_create( - instance=Finding.objects.all()[0], - timestamp=two_weeks_ago, - changes="foo", - action=LogEntry.Action.UPDATE, + """Test that flush_auditlog deletes entries older than retention period.""" + # Get pghistory event model + ProductTypeEvent = apps.get_model("dojo", "Product_TypeEvent") + + # Create a test product type + product_type = Product_Type.objects.create( + name="Test Product Type for Retention", + description="Test description", ) - log_entry.timestamp = two_weeks_ago - log_entry.save() + + # Get the event created by the creation + recent_event = ProductTypeEvent.objects.filter(pgh_obj_id=product_type.id).first() + + # Manually create an old event by updating the timestamp + # Set it to 2 months ago so it will be deleted with retention period of 1 month + if recent_event: + two_months_ago = datetime.now(UTC) - relativedelta(months=2) + # Update the created_at timestamp to make it old + ProductTypeEvent.objects.filter(pk=recent_event.pk).update(pgh_created_at=two_months_ago) + + # Count events before flush + entries_before = ProductTypeEvent.objects.count() + + # Flush with retention period of 1 month flush_auditlog() - entries_after = LogEntry.objects.filter(timestamp__date__lt=datetime.now(UTC)).count() - # we have three old log entries in our testdata and added a new one - self.assertEqual(entries_before - 3 + 1, entries_after) + + # Count events after flush + entries_after = ProductTypeEvent.objects.count() + + # The old event should be deleted (2 months old > 1 month retention) + self.assertLess(entries_after, entries_before, "Old entries should be deleted") + + # Clean up + product_type.delete() diff --git a/unittests/test_importers_performance.py b/unittests/test_importers_performance.py index 38d63babad1..8a71c7223b2 100644 --- a/unittests/test_importers_performance.py +++ b/unittests/test_importers_performance.py @@ -170,22 +170,7 @@ def _import_reimport_performance(self, expected_num_queries1, expected_num_async reimporter = DefaultReImporter(**reimport_options) test, _, _len_new_findings, _len_closed_findings, _, _, _ = reimporter.process_scan(scan) - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog") - def test_import_reimport_reimport_performance_async(self): - # Ensure django-auditlog is properly configured for this test - configure_audit_system() - configure_pghistory_triggers() - - self._import_reimport_performance( - expected_num_queries1=340, - expected_num_async_tasks1=10, - expected_num_queries2=288, - expected_num_async_tasks2=22, - expected_num_queries3=175, - expected_num_async_tasks3=20, - ) - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") + @override_settings(ENABLE_AUDITLOG=True) def test_import_reimport_reimport_performance_pghistory_async(self): """ This test checks the performance of the importers when using django-pghistory with async enabled. @@ -203,31 +188,7 @@ def test_import_reimport_reimport_performance_pghistory_async(self): expected_num_async_tasks3=20, ) - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog") - def test_import_reimport_reimport_performance_no_async(self): - """ - This test checks the performance of the importers when they are run in sync mode. - The reason for this is that we also want to be aware of when a PR affects the number of queries - or async tasks created by a background task. - The impersonate context manager above does not work as expected for disabling async, - so we patch the we_want_async decorator to always return False. - """ - configure_audit_system() - configure_pghistory_triggers() - - testuser = User.objects.get(username="admin") - testuser.usercontactinfo.block_execution = True - testuser.usercontactinfo.save() - self._import_reimport_performance( - expected_num_queries1=350, - expected_num_async_tasks1=10, - expected_num_queries2=305, - expected_num_async_tasks2=22, - expected_num_queries3=190, - expected_num_async_tasks3=20, - ) - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") + @override_settings(ENABLE_AUDITLOG=True) def test_import_reimport_reimport_performance_pghistory_no_async(self): """ This test checks the performance of the importers when using django-pghistory with async disabled. @@ -249,33 +210,7 @@ def test_import_reimport_reimport_performance_pghistory_no_async(self): expected_num_async_tasks3=20, ) - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog") - def test_import_reimport_reimport_performance_no_async_with_product_grading(self): - """ - This test checks the performance of the importers when they are run in sync mode. - The reason for this is that we also want to be aware of when a PR affects the number of queries - or async tasks created by a background task. - The impersonate context manager above does not work as expected for disabling async, - so we patch the we_want_async decorator to always return False. - """ - configure_audit_system() - configure_pghistory_triggers() - - testuser = User.objects.get(username="admin") - testuser.usercontactinfo.block_execution = True - testuser.usercontactinfo.save() - self.system_settings(enable_product_grade=True) - - self._import_reimport_performance( - expected_num_queries1=351, - expected_num_async_tasks1=11, - expected_num_queries2=306, - expected_num_async_tasks2=23, - expected_num_queries3=191, - expected_num_async_tasks3=21, - ) - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") + @override_settings(ENABLE_AUDITLOG=True) def test_import_reimport_reimport_performance_pghistory_no_async_with_product_grading(self): """ This test checks the performance of the importers when using django-pghistory with async disabled and product grading enabled. @@ -400,27 +335,7 @@ def _deduplication_performance(self, expected_num_queries1, expected_num_async_t total_findings = Finding.objects.filter(test__engagement=engagement).count() self.assertEqual(total_findings, 12, f"Expected 12 total findings, got {total_findings}") - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog") - def test_deduplication_performance_async(self): - """ - Test deduplication performance with async tasks enabled. - This test imports the same scan twice to measure deduplication query and task overhead. - """ - configure_audit_system() - configure_pghistory_triggers() - - # Enable deduplication - self.system_settings(enable_deduplication=True) - - self._deduplication_performance( - expected_num_queries1=311, - expected_num_async_tasks1=12, - expected_num_queries2=204, - expected_num_async_tasks2=12, - check_duplicates=False, # Async mode - deduplication happens later - ) - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") + @override_settings(ENABLE_AUDITLOG=True) def test_deduplication_performance_pghistory_async(self): """Test deduplication performance with django-pghistory and async tasks enabled.""" configure_audit_system() @@ -437,27 +352,7 @@ def test_deduplication_performance_pghistory_async(self): check_duplicates=False, # Async mode - deduplication happens later ) - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-auditlog") - def test_deduplication_performance_no_async(self): - """Test deduplication performance with async tasks disabled.""" - configure_audit_system() - configure_pghistory_triggers() - - # Enable deduplication - self.system_settings(enable_deduplication=True) - - testuser = User.objects.get(username="admin") - testuser.usercontactinfo.block_execution = True - testuser.usercontactinfo.save() - - self._deduplication_performance( - expected_num_queries1=323, - expected_num_async_tasks1=12, - expected_num_queries2=318, - expected_num_async_tasks2=12, - ) - - @override_settings(ENABLE_AUDITLOG=True, AUDITLOG_TYPE="django-pghistory") + @override_settings(ENABLE_AUDITLOG=True) def test_deduplication_performance_pghistory_no_async(self): """Test deduplication performance with django-pghistory and async tasks disabled.""" configure_audit_system() From 94fb6dea55d6d7a34b381cad3ecded4b956f42b1 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 1 Nov 2025 18:07:14 +0100 Subject: [PATCH 2/8] ruff --- unittests/test_flush_auditlog.py | 1 + 1 file changed, 1 insertion(+) diff --git a/unittests/test_flush_auditlog.py b/unittests/test_flush_auditlog.py index bf139e8e144..da347d952d5 100644 --- a/unittests/test_flush_auditlog.py +++ b/unittests/test_flush_auditlog.py @@ -5,6 +5,7 @@ """ import logging from datetime import UTC, datetime + from dateutil.relativedelta import relativedelta from django.apps import apps from django.test import override_settings From 1a4e7a8772b3a3ccee7a759030fc5fd6b5d49213 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 1 Nov 2025 18:49:06 +0100 Subject: [PATCH 3/8] pghistory: add finding.reviewers to tracked models --- dojo/auditlog.py | 45 +++++++------ ...eviewers_findingreviewersevent_and_more.py | 64 +++++++++++++++++++ .../management/commands/pghistory_backfill.py | 1 + .../commands/pghistory_backfill_fast.py | 1 + 4 files changed, 91 insertions(+), 20 deletions(-) create mode 100644 dojo/db_migrations/0247_findingreviewers_findingreviewersevent_and_more.py diff --git a/dojo/auditlog.py b/dojo/auditlog.py index d0e1354086c..1435600d307 100644 --- a/dojo/auditlog.py +++ b/dojo/auditlog.py @@ -220,26 +220,6 @@ def register_django_pghistory_models(): }, )(Finding) - # # Track the reviewers ManyToMany relationship through table - # # This tracks additions/removals of reviewers from findings - # reviewers_through = Finding._meta.get_field("reviewers").remote_field.through - # if reviewers_through: - # logger.info(f"Tracking reviewers M2M through table: {reviewers_through} (db_table: {reviewers_through._meta.db_table})") - # pghistory.track( - # pghistory.InsertEvent(), - # pghistory.DeleteEvent(), - # meta={ - # "indexes": [ - # models.Index(fields=["pgh_created_at"]), - # models.Index(fields=["pgh_label"]), - # models.Index(fields=["pgh_context_id"]), - # ], - # }, - # )(reviewers_through) - # logger.info("Successfully registered pghistory tracking for reviewers through table") - # else: - # logger.warning("Could not find reviewers through table for Finding model!") - pghistory.track( pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), @@ -354,6 +334,31 @@ def register_django_pghistory_models(): }, )(Notification_Webhooks) + # Track Finding.reviewers ManyToMany relationship + # Create a proxy model for the through table as per pghistory docs: + # https://django-pghistory.readthedocs.io/en/2.4.2/tutorial.html#tracking-many-to-many-events + # Note: For auto-generated through models, we don't specify obj_fk/obj_field + # as Django doesn't allow foreign keys to auto-generated through models + reviewers_through = Finding._meta.get_field("reviewers").remote_field.through + + class FindingReviewers(reviewers_through): + class Meta: + proxy = True + + pghistory.track( + pghistory.InsertEvent(), + pghistory.DeleteEvent(), + pghistory.ManualEvent(label="initial_import"), + meta={ + "db_table": "dojo_finding_reviewersevent", + "indexes": [ + models.Index(fields=["pgh_created_at"]), + models.Index(fields=["pgh_label"]), + models.Index(fields=["pgh_context_id"]), + ], + }, + )(FindingReviewers) + # Only log during actual application startup, not during shell commands if "shell" not in sys.argv: logger.info("Successfully registered models with django-pghistory") diff --git a/dojo/db_migrations/0247_findingreviewers_findingreviewersevent_and_more.py b/dojo/db_migrations/0247_findingreviewers_findingreviewersevent_and_more.py new file mode 100644 index 00000000000..1da2d3e1739 --- /dev/null +++ b/dojo/db_migrations/0247_findingreviewers_findingreviewersevent_and_more.py @@ -0,0 +1,64 @@ +# Generated by Django 5.1.13 on 2025-11-01 17:04 + +import django.db.models.deletion +import pgtrigger.compiler +import pgtrigger.migrations +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("dojo", "0246_endpoint_idx_ep_product_lower_host_and_more") + ] + + operations = [ + migrations.CreateModel( + name="FindingReviewers", + fields=[ + ], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("dojo.finding_reviewers",), + ), + migrations.CreateModel( + name="FindingReviewersEvent", + fields=[ + ("pgh_id", models.AutoField(primary_key=True, serialize=False)), + ("pgh_created_at", models.DateTimeField(auto_now_add=True)), + ("pgh_label", models.TextField(help_text="The event label.")), + ("id", models.IntegerField()), + ("dojo_user", models.ForeignKey(db_constraint=False, db_index=False, db_tablespace="", on_delete=django.db.models.deletion.DO_NOTHING, related_name="+", related_query_name="+", to="dojo.dojo_user")), + ("finding", models.ForeignKey(db_constraint=False, db_index=False, db_tablespace="", on_delete=django.db.models.deletion.DO_NOTHING, related_name="+", related_query_name="+", to="dojo.finding")), + ("pgh_context", models.ForeignKey(db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name="+", to="pghistory.context")), + ("pgh_obj", models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, related_name="events", to="dojo.findingreviewers")), + ], + options={ + "abstract": False, + "db_table": "dojo_finding_reviewersevent", + }, + ), + migrations.AddIndex( + model_name="findingreviewersevent", + index=models.Index(fields=["pgh_created_at"], name="dojo_findin_pgh_cre_d5e5b4_idx"), + ), + migrations.AddIndex( + model_name="findingreviewersevent", + index=models.Index(fields=["pgh_label"], name="dojo_findin_pgh_lab_5517f9_idx"), + ), + migrations.AddIndex( + model_name="findingreviewersevent", + index=models.Index(fields=["pgh_context_id"], name="dojo_findin_pgh_con_06229b_idx"), + ), + pgtrigger.migrations.AddTrigger( + model_name="findingreviewers", + trigger=pgtrigger.compiler.Trigger(name="insert_insert", sql=pgtrigger.compiler.UpsertTriggerSql(func='INSERT INTO "dojo_finding_reviewersevent" ("dojo_user_id", "finding_id", "id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id") VALUES (NEW."dojo_user_id", NEW."finding_id", NEW."id", _pgh_attach_context(), NOW(), \'insert\', NEW."id"); RETURN NULL;', hash="5c1fd440159e49c929122cbb590f96983a1c934e", operation="INSERT", pgid="pgtrigger_insert_insert_0808c", table="dojo_finding_reviewers", when="AFTER")), + ), + pgtrigger.migrations.AddTrigger( + model_name="findingreviewers", + trigger=pgtrigger.compiler.Trigger(name="delete_delete", sql=pgtrigger.compiler.UpsertTriggerSql(func='INSERT INTO "dojo_finding_reviewersevent" ("dojo_user_id", "finding_id", "id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id") VALUES (OLD."dojo_user_id", OLD."finding_id", OLD."id", _pgh_attach_context(), NOW(), \'delete\', OLD."id"); RETURN NULL;', hash="23a4e01eaea469f708679392a6a92a6e16b21181", operation="DELETE", pgid="pgtrigger_delete_delete_40083", table="dojo_finding_reviewers", when="AFTER")), + ), + ] diff --git a/dojo/management/commands/pghistory_backfill.py b/dojo/management/commands/pghistory_backfill.py index 637073728bc..0d7789d435d 100644 --- a/dojo/management/commands/pghistory_backfill.py +++ b/dojo/management/commands/pghistory_backfill.py @@ -164,6 +164,7 @@ def handle(self, *args, **options): "Dojo_User", "Endpoint", "Engagement", "Finding", "Finding_Group", "Product_Type", "Product", "Test", "Risk_Acceptance", "Finding_Template", "Cred_User", "Notification_Webhooks", + "FindingReviewers", # M2M through table for Finding.reviewers ] specific_model = options.get("model") diff --git a/dojo/management/commands/pghistory_backfill_fast.py b/dojo/management/commands/pghistory_backfill_fast.py index f517ad6704b..ac83168aad6 100644 --- a/dojo/management/commands/pghistory_backfill_fast.py +++ b/dojo/management/commands/pghistory_backfill_fast.py @@ -490,6 +490,7 @@ def handle(self, *args, **options): "Dojo_User", "Endpoint", "Engagement", "Finding", "Finding_Group", "Product_Type", "Product", "Test", "Risk_Acceptance", "Finding_Template", "Cred_User", "Notification_Webhooks", + "FindingReviewers", # M2M through table for Finding.reviewers ] specific_model = options.get("model") From ef2f408992060f4261a3b70bb54637c1f7e54cf4 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 1 Nov 2025 19:30:23 +0100 Subject: [PATCH 4/8] fix finding reviewers model registration --- dojo/auditlog.py | 59 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 19 deletions(-) diff --git a/dojo/auditlog.py b/dojo/auditlog.py index 1435600d307..db6a5529135 100644 --- a/dojo/auditlog.py +++ b/dojo/auditlog.py @@ -18,6 +18,11 @@ logger = logging.getLogger(__name__) +# FindingReviewers proxy model will be created lazily in register_django_pghistory_models() +# Cannot be defined at module level because Finding.reviewers.through requires +# Django's app registry to be ready (AppRegistryNotReady error) +# The function is called from DojoAppConfig.ready() which guarantees the registry is ready + def _flush_models_in_batches(models_to_flush, timestamp_field: str, retention_period: int, batch_size: int, max_batches: int, *, dry_run: bool = False) -> tuple[int, int, bool]: """ @@ -339,25 +344,41 @@ def register_django_pghistory_models(): # https://django-pghistory.readthedocs.io/en/2.4.2/tutorial.html#tracking-many-to-many-events # Note: For auto-generated through models, we don't specify obj_fk/obj_field # as Django doesn't allow foreign keys to auto-generated through models - reviewers_through = Finding._meta.get_field("reviewers").remote_field.through - - class FindingReviewers(reviewers_through): - class Meta: - proxy = True - - pghistory.track( - pghistory.InsertEvent(), - pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), - meta={ - "db_table": "dojo_finding_reviewersevent", - "indexes": [ - models.Index(fields=["pgh_created_at"]), - models.Index(fields=["pgh_label"]), - models.Index(fields=["pgh_context_id"]), - ], - }, - )(FindingReviewers) + # + # We must create the proxy model here (not at module level) because: + # 1. Finding.reviewers.through requires Django's app registry to be ready + # 2. This function is called from DojoAppConfig.ready() which guarantees registry is ready + # 3. We check if it already exists to avoid re-registration warnings + # + # Note: This pattern is not explicitly documented in Django's official documentation. + # Django docs mention AppRegistryNotReady and AppConfig.ready() in general terms, but + # don't specifically cover proxy models for auto-generated ManyToMany through tables. + # This is a common pattern used by libraries like django-pghistory and is necessary + # because accessing Model.field.through at module import time triggers AppRegistryNotReady. + try: + FindingReviewers = apps.get_model("dojo", "FindingReviewers") + except LookupError: + # Model doesn't exist yet, create it + # Note: Finding is imported above, and apps registry is ready when this runs + reviewers_through = Finding._meta.get_field("reviewers").remote_field.through + + class FindingReviewers(reviewers_through): + class Meta: + proxy = True + + pghistory.track( + pghistory.InsertEvent(), + pghistory.DeleteEvent(), + pghistory.ManualEvent(label="initial_import"), + meta={ + "db_table": "dojo_finding_reviewersevent", + "indexes": [ + models.Index(fields=["pgh_created_at"]), + models.Index(fields=["pgh_label"]), + models.Index(fields=["pgh_context_id"]), + ], + }, + )(FindingReviewers) # Only log during actual application startup, not during shell commands if "shell" not in sys.argv: From e6d6360132d67d36a6945c4bde2f0ee0a768b7e7 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 1 Nov 2025 20:49:22 +0100 Subject: [PATCH 5/8] remove more references --- dojo/endpoint/signals.py | 13 +--- dojo/engagement/signals.py | 13 +--- dojo/finding_group/signals.py | 13 +--- .../commands/stamp_finding_last_reviewed.py | 78 ------------------- dojo/product/signals.py | 14 +--- dojo/product_type/signals.py | 14 +--- dojo/test/signals.py | 13 +--- 7 files changed, 7 insertions(+), 151 deletions(-) delete mode 100644 dojo/management/commands/stamp_finding_last_reviewed.py diff --git a/dojo/endpoint/signals.py b/dojo/endpoint/signals.py index 50251c5a80a..aebc348c003 100644 --- a/dojo/endpoint/signals.py +++ b/dojo/endpoint/signals.py @@ -1,9 +1,7 @@ import contextlib -from auditlog.models import LogEntry from django.conf import settings from django.contrib.auth import get_user_model -from django.contrib.contenttypes.models import ContentType from django.db.models.signals import post_delete from django.dispatch import receiver from django.urls import reverse @@ -23,7 +21,7 @@ def endpoint_post_delete(sender, instance, using, origin, **kwargs): user = None if settings.ENABLE_AUDITLOG: - # First try to find deletion author in pghistory events + # Find deletion author in pghistory events # Look for delete events for this specific endpoint instance pghistory_delete_events = DojoEvents.objects.filter( pgh_obj_model="dojo.Endpoint", @@ -39,15 +37,6 @@ def endpoint_post_delete(sender, instance, using, origin, **kwargs): with contextlib.suppress(User.DoesNotExist): user = User.objects.get(id=latest_delete.user) - # Fall back to django-auditlog if no user found in pghistory - if not user: - if le := LogEntry.objects.filter( - action=LogEntry.Action.DELETE, - content_type=ContentType.objects.get(app_label="dojo", model="endpoint"), - object_id=instance.id, - ).order_by("-id").first(): - user = le.actor - # Update description with user if found if user: description = _('The endpoint "%(name)s" was deleted by %(user)s') % { diff --git a/dojo/engagement/signals.py b/dojo/engagement/signals.py index 144094a3264..0d6b8916dd2 100644 --- a/dojo/engagement/signals.py +++ b/dojo/engagement/signals.py @@ -1,9 +1,7 @@ import contextlib -from auditlog.models import LogEntry from django.conf import settings from django.contrib.auth import get_user_model -from django.contrib.contenttypes.models import ContentType from django.db.models.signals import post_delete, post_save, pre_delete, pre_save from django.dispatch import receiver from django.urls import reverse @@ -50,7 +48,7 @@ def engagement_post_delete(sender, instance, using, origin, **kwargs): user = None if settings.ENABLE_AUDITLOG: - # First try to find deletion author in pghistory events + # Find deletion author in pghistory events # Look for delete events for this specific engagement instance pghistory_delete_events = DojoEvents.objects.filter( pgh_obj_model="dojo.Engagement", @@ -66,15 +64,6 @@ def engagement_post_delete(sender, instance, using, origin, **kwargs): with contextlib.suppress(User.DoesNotExist): user = User.objects.get(id=latest_delete.user) - # Fall back to django-auditlog if no user found in pghistory - if not user: - if le := LogEntry.objects.filter( - action=LogEntry.Action.DELETE, - content_type=ContentType.objects.get(app_label="dojo", model="engagement"), - object_id=instance.id, - ).order_by("-id").first(): - user = le.actor - # Update description with user if found if user: description = _('The engagement "%(name)s" was deleted by %(user)s') % { diff --git a/dojo/finding_group/signals.py b/dojo/finding_group/signals.py index 3e7ffe7c7b7..4b41b838983 100644 --- a/dojo/finding_group/signals.py +++ b/dojo/finding_group/signals.py @@ -1,9 +1,7 @@ import contextlib -from auditlog.models import LogEntry from django.conf import settings from django.contrib.auth import get_user_model -from django.contrib.contenttypes.models import ContentType from django.db.models.signals import post_delete from django.dispatch import receiver from django.urls import reverse @@ -21,7 +19,7 @@ def finding_group_post_delete(sender, instance, using, origin, **kwargs): user = None if settings.ENABLE_AUDITLOG: - # First try to find deletion author in pghistory events + # Find deletion author in pghistory events # Look for delete events for this specific finding_group instance pghistory_delete_events = DojoEvents.objects.filter( pgh_obj_model="dojo.Finding_Group", @@ -37,15 +35,6 @@ def finding_group_post_delete(sender, instance, using, origin, **kwargs): with contextlib.suppress(User.DoesNotExist): user = User.objects.get(id=latest_delete.user) - # Fall back to django-auditlog if no user found in pghistory - if not user: - if le := LogEntry.objects.filter( - action=LogEntry.Action.DELETE, - content_type=ContentType.objects.get(app_label="dojo", model="finding_group"), - object_id=instance.id, - ).order_by("-id").first(): - user = le.actor - # Update description with user if found if user: description = _('The finding group "%(name)s" was deleted by %(user)s') % { diff --git a/dojo/management/commands/stamp_finding_last_reviewed.py b/dojo/management/commands/stamp_finding_last_reviewed.py deleted file mode 100644 index ade2bcb6bc3..00000000000 --- a/dojo/management/commands/stamp_finding_last_reviewed.py +++ /dev/null @@ -1,78 +0,0 @@ - -from auditlog.models import LogEntry -from django.contrib.contenttypes.models import ContentType -from django.core.management.base import BaseCommand - -from dojo.models import Finding - -""" -Authors: Jay Paz -New fields last_reviewed, last_reviewed_by, mitigated_by have been added to the Finding model -This script will update all findings with a last_reviewed date of the most current date from: -1. Finding Date if no other evidence of activity is found -2. Last note added date if a note is found -3. Mitigation Date if finding is mitigated -4. Last action_log entry date if Finding has been updated - -It will update the last_reviewed_by with the current reporter. - -If mitigated it will update the mitigated_by with last_reviewed_by or current reporter if last_reviewed_by is None -""" - - -class Command(BaseCommand): - help = ( - "A new field last_reviewed has been added to the Finding model \n" - "This script will update all findings with a last_reviewed date of the most current date from: \n" - "1. Finding Date if no other evidence of activity is found \n" - "2. Last note added date if a note is found \n" - "3. Mitigation Date if finding is mitigated \n" - "4. Last action_log entry date if Finding has been updated \n" - ) - - def handle(self, *args, **options): - findings = Finding.objects.all().order_by("id") - for finding in findings: - save = False - if not finding.last_reviewed: - date_discovered = finding.date - last_note_date = finding.date - - if finding.notes.all(): - last_note_date = finding.notes.order_by("-date")[ - 0].date.date() - - mitigation_date = finding.date - - if finding.mitigated: - mitigation_date = finding.mitigated.date() - - last_action_date = finding.date - - try: - ct = ContentType.objects.get_for_id( - ContentType.objects.get_for_model(finding).id) - obj = ct.get_object_for_this_type(pk=finding.id) - log_entries = LogEntry.objects.filter(content_type=ct, - object_pk=obj.id).order_by( - "-timestamp") - if log_entries: - last_action_date = log_entries[0].timestamp.date() - except KeyError: - pass - - finding.last_reviewed = max( - [date_discovered, last_note_date, mitigation_date, - last_action_date]) - save = True - - if not finding.last_reviewed_by: - finding.last_reviewed_by = finding.reporter - save = True - - if finding.mitigated: - if not finding.mitigated_by: - finding.mitigated_by = finding.last_reviewed_by or finding.reporter - save = True - if save: - finding.save() diff --git a/dojo/product/signals.py b/dojo/product/signals.py index 0ed9a62747c..efcf23da5aa 100644 --- a/dojo/product/signals.py +++ b/dojo/product/signals.py @@ -1,9 +1,7 @@ import contextlib -from auditlog.models import LogEntry from django.conf import settings from django.contrib.auth import get_user_model -from django.contrib.contenttypes.models import ContentType from django.db.models.signals import post_delete, post_save from django.dispatch import receiver from django.urls import reverse @@ -37,7 +35,7 @@ def product_post_delete(sender, instance, **kwargs): user = None if settings.ENABLE_AUDITLOG: - # First try to find deletion author in pghistory events + # Find deletion author in pghistory events # Look for delete events for this specific product instance pghistory_delete_events = DojoEvents.objects.filter( pgh_obj_model="dojo.Product", @@ -53,15 +51,7 @@ def product_post_delete(sender, instance, **kwargs): with contextlib.suppress(User.DoesNotExist): user = User.objects.get(id=latest_delete.user) - # Fall back to django-auditlog if no user found in pghistory - if not user: - if le := LogEntry.objects.filter( - action=LogEntry.Action.DELETE, - content_type=ContentType.objects.get(app_label="dojo", model="product"), - object_id=instance.id, - ).order_by("-id").first(): - user = le.actor - + # Fallback to the current user of the request (Which might be not available for ASYNC_OBJECT_DELETE scenario's) if not user: current_user = get_current_user() user = current_user diff --git a/dojo/product_type/signals.py b/dojo/product_type/signals.py index 523e7dcedc4..8bb435751d5 100644 --- a/dojo/product_type/signals.py +++ b/dojo/product_type/signals.py @@ -1,10 +1,8 @@ import contextlib -from auditlog.models import LogEntry from crum import get_current_user from django.conf import settings from django.contrib.auth import get_user_model -from django.contrib.contenttypes.models import ContentType from django.db.models.signals import post_delete, post_save from django.dispatch import receiver from django.urls import reverse @@ -37,7 +35,7 @@ def product_type_post_delete(sender, instance, **kwargs): user = None if settings.ENABLE_AUDITLOG: - # First try to find deletion author in pghistory events + # Find deletion author in pghistory events # Look for delete events for this specific product_type instance pghistory_delete_events = DojoEvents.objects.filter( pgh_obj_model="dojo.Product_Type", @@ -53,16 +51,6 @@ def product_type_post_delete(sender, instance, **kwargs): with contextlib.suppress(User.DoesNotExist): user = User.objects.get(id=latest_delete.user) - # Fall back to django-auditlog if no user found in pghistory - if not user: - if le := LogEntry.objects.filter( - action=LogEntry.Action.DELETE, - content_type=ContentType.objects.get(app_label="dojo", model="product_type"), - object_id=instance.id, - ).order_by("-id").first(): - user = le.actor - - # Since adding pghistory as auditlog option, this signal here runs before the django-auditlog signal # Fallback to the current user of the request (Which might be not available for ASYNC_OBJECT_DELETE scenario's) if not user: current_user = get_current_user() diff --git a/dojo/test/signals.py b/dojo/test/signals.py index 22669a2a040..cc180c5d55e 100644 --- a/dojo/test/signals.py +++ b/dojo/test/signals.py @@ -1,9 +1,7 @@ import contextlib -from auditlog.models import LogEntry from django.conf import settings from django.contrib.auth import get_user_model -from django.contrib.contenttypes.models import ContentType from django.db.models.signals import post_delete, pre_delete, pre_save from django.dispatch import receiver from django.urls import reverse @@ -25,7 +23,7 @@ def test_post_delete(sender, instance, using, origin, **kwargs): user = None if settings.ENABLE_AUDITLOG: - # First try to find deletion author in pghistory events + # Find deletion author in pghistory events # Look for delete events for this specific test instance pghistory_delete_events = DojoEvents.objects.filter( pgh_obj_model="dojo.Test", @@ -41,15 +39,6 @@ def test_post_delete(sender, instance, using, origin, **kwargs): with contextlib.suppress(User.DoesNotExist): user = User.objects.get(id=latest_delete.user) - # Fall back to django-auditlog if no user found in pghistory - if not user: - if le := LogEntry.objects.filter( - action=LogEntry.Action.DELETE, - content_type=ContentType.objects.get(app_label="dojo", model="test"), - object_id=instance.id, - ).order_by("-id").first(): - user = le.actor - # Update description with user if found if user: description = _('The test "%(name)s" was deleted by %(user)s') % { From 1898338b6410bf3277c0fa07cb07876d100ecade Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Sat, 1 Nov 2025 23:33:17 +0100 Subject: [PATCH 6/8] add migration --- docs/content/en/open_source/upgrading/2.53.md | 6 + dojo/auditlog.py | 376 ++++++++++++++++- dojo/db_migrations/0248_pghistory_backfill.py | 85 ++++ dojo/filters.py | 2 +- .../management/commands/pghistory_backfill.py | 35 +- .../commands/pghistory_backfill_fast.py | 399 +----------------- .../commands/pghistory_backfill_simple.py | 6 +- dojo/templates/dojo/action_history.html | 4 +- 8 files changed, 495 insertions(+), 418 deletions(-) create mode 100644 dojo/db_migrations/0248_pghistory_backfill.py diff --git a/docs/content/en/open_source/upgrading/2.53.md b/docs/content/en/open_source/upgrading/2.53.md index e72751787ea..103d0fe0d3e 100644 --- a/docs/content/en/open_source/upgrading/2.53.md +++ b/docs/content/en/open_source/upgrading/2.53.md @@ -31,7 +31,13 @@ The switch to `django-pghistory` provides several advantages: ### Migration Notes - A one-time data migration will take place to populate the `django-pghistory` tables with the initial snapshot of the tracked models. +- The migration is designed to be fail-safe: if it fails for some reason, it will continue where it left off. +- The migration can also be performed up front via + - `docker compose exec uwsgi bash -c "python manage.py pghistory_backfill_fast"`, or + - `docker compose exec uwsgi bash -c "python manage.py pghistory_backfill_simple"`, or + - `docker compose exec uwsgi bash -c "python manage.py pghistory_backfill"` +The backfill migration is not mandatory to succeed. If it fails for some reason, the only side effect will be that the first auditlog diff will contain all fields of an object instead just the changed fields. --- Check the [Release Notes](https://github.com/DefectDojo/django-DefectDojo/releases/tag/2.53.0) for the complete contents of this release. diff --git a/dojo/auditlog.py b/dojo/auditlog.py index db6a5529135..78eff1b91e0 100644 --- a/dojo/auditlog.py +++ b/dojo/auditlog.py @@ -7,13 +7,14 @@ import logging import os import sys +import time import pghistory from dateutil.relativedelta import relativedelta from django.apps import apps from django.conf import settings from django.core.management import call_command -from django.db import models +from django.db import connection, models from django.utils import timezone logger = logging.getLogger(__name__) @@ -171,7 +172,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), exclude=["password"], # add some indexes manually so we don't have to define a customer phistory Event model with overridden fields. meta={ @@ -187,7 +188,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "indexes": [ models.Index(fields=["pgh_created_at"]), @@ -201,7 +202,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "indexes": [ models.Index(fields=["pgh_created_at"]), @@ -215,7 +216,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "indexes": [ models.Index(fields=["pgh_created_at"]), @@ -229,7 +230,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "indexes": [ models.Index(fields=["pgh_created_at"]), @@ -243,7 +244,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "indexes": [ models.Index(fields=["pgh_created_at"]), @@ -257,7 +258,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "indexes": [ models.Index(fields=["pgh_created_at"]), @@ -271,7 +272,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "indexes": [ models.Index(fields=["pgh_created_at"]), @@ -285,7 +286,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "indexes": [ models.Index(fields=["pgh_created_at"]), @@ -299,7 +300,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "indexes": [ models.Index(fields=["pgh_created_at"]), @@ -313,7 +314,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), exclude=["password"], meta={ "indexes": [ @@ -328,7 +329,7 @@ def register_django_pghistory_models(): pghistory.InsertEvent(), pghistory.UpdateEvent(condition=pghistory.AnyChange(exclude_auto=True)), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), exclude=["header_name", "header_value"], meta={ "indexes": [ @@ -369,7 +370,7 @@ class Meta: pghistory.track( pghistory.InsertEvent(), pghistory.DeleteEvent(), - pghistory.ManualEvent(label="initial_import"), + pghistory.ManualEvent(label="initial_backfill"), meta={ "db_table": "dojo_finding_reviewersevent", "indexes": [ @@ -449,3 +450,350 @@ def configure_audit_system(): if log_enabled: logger.info("Audit logging configured: django-pghistory") + + +# Backfill functions for pghistory tables +def get_excluded_fields(model_name): + """Get the list of excluded fields for a specific model from pghistory configuration.""" + # Define excluded fields for each model (matching auditlog.py) + excluded_fields_map = { + "Dojo_User": ["password"], + "Product": ["updated"], # This is the key change + "Cred_User": ["password"], + "Notification_Webhooks": ["header_name", "header_value"], + } + return excluded_fields_map.get(model_name, []) + + +def get_table_names(model_name): + """Get the source table name and event table name for a model.""" + # Handle special cases for table naming + if model_name == "Dojo_User": + table_name = "dojo_dojo_user" + event_table_name = "dojo_dojo_userevent" + elif model_name == "Product_Type": + table_name = "dojo_product_type" + event_table_name = "dojo_product_typeevent" + elif model_name == "Finding_Group": + table_name = "dojo_finding_group" + event_table_name = "dojo_finding_groupevent" + elif model_name == "Risk_Acceptance": + table_name = "dojo_risk_acceptance" + event_table_name = "dojo_risk_acceptanceevent" + elif model_name == "Finding_Template": + table_name = "dojo_finding_template" + event_table_name = "dojo_finding_templateevent" + elif model_name == "Cred_User": + table_name = "dojo_cred_user" + event_table_name = "dojo_cred_userevent" + elif model_name == "Notification_Webhooks": + table_name = "dojo_notification_webhooks" + event_table_name = "dojo_notification_webhooksevent" + elif model_name == "FindingReviewers": + # M2M through table: Django creates dojo_finding_reviewers for Finding.reviewers + table_name = "dojo_finding_reviewers" + event_table_name = "dojo_finding_reviewersevent" + else: + table_name = f"dojo_{model_name.lower()}" + event_table_name = f"dojo_{model_name.lower()}event" + return table_name, event_table_name + + +def check_tables_exist(table_name, event_table_name): + """Check if both source and event tables exist.""" + with connection.cursor() as cursor: + cursor.execute(""" + SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = %s + ) + """, [table_name]) + table_exists = cursor.fetchone()[0] + + cursor.execute(""" + SELECT EXISTS ( + SELECT FROM information_schema.tables + WHERE table_name = %s + ) + """, [event_table_name]) + event_table_exists = cursor.fetchone()[0] + + return table_exists, event_table_exists + + +def process_model_backfill( + model_name, + batch_size=10000, + *, + dry_run=False, + progress_callback=None, +): + """ + Process a single model's backfill using PostgreSQL COPY. + + Args: + model_name: Name of the model to backfill + batch_size: Number of records to process in each batch + dry_run: If True, only show what would be done without creating events + progress_callback: Optional callable that receives (message, style) tuples + for progress updates. If None, uses logger.info + + Returns: + tuple: (processed_count, records_per_second) + + """ + if progress_callback is None: + def progress_callback(msg, style=None): + logger.info(msg) + + try: + table_name, event_table_name = get_table_names(model_name) + + # Check if tables exist + table_exists, event_table_exists = check_tables_exist(table_name, event_table_name) + + if not table_exists: + progress_callback(f" Table {table_name} not found") + return 0, 0.0 + + if not event_table_exists: + progress_callback( + f" Event table {event_table_name} not found. " + f"Is {model_name} tracked by pghistory?", + "ERROR", + ) + return 0, 0.0 + + # Get total count using raw SQL + with connection.cursor() as cursor: + cursor.execute(f"SELECT COUNT(*) FROM {table_name}") + total_count = cursor.fetchone()[0] + + if total_count == 0: + progress_callback(f" No records found for {model_name}") + return 0, 0.0 + + progress_callback(f" Found {total_count:,} records") + + # Get excluded fields + excluded_fields = get_excluded_fields(model_name) + + # Check if records already have initial_backfill events using raw SQL + with connection.cursor() as cursor: + cursor.execute(f"SELECT COUNT(*) FROM {event_table_name} WHERE pgh_label = 'initial_backfill'") + existing_count = cursor.fetchone()[0] + + # Get records that need backfill using raw SQL + with connection.cursor() as cursor: + cursor.execute(f""" + SELECT COUNT(*) FROM {table_name} t + WHERE NOT EXISTS ( + SELECT 1 FROM {event_table_name} e + WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_backfill' + ) + """) + backfill_count = cursor.fetchone()[0] + + # Log the breakdown + progress_callback(f" Records with initial_backfill events: {existing_count:,}") + progress_callback(f" Records needing initial_backfill events: {backfill_count:,}") + + if backfill_count == 0: + progress_callback(f" ✓ All {total_count:,} records already have initial_backfill events", "SUCCESS") + return total_count, 0.0 + + if dry_run: + progress_callback(f" Would process {backfill_count:,} records using COPY...") + return backfill_count, 0.0 + + # Get event table columns using raw SQL (excluding auto-generated pgh_id) + with connection.cursor() as cursor: + cursor.execute(""" + SELECT column_name + FROM information_schema.columns + WHERE table_name = %s AND column_name != 'pgh_id' + ORDER BY ordinal_position + """, [event_table_name]) + event_columns = [row[0] for row in cursor.fetchall()] + + # Get all IDs that need backfill first + with connection.cursor() as cursor: + cursor.execute(f""" + SELECT t.id FROM {table_name} t + WHERE NOT EXISTS ( + SELECT 1 FROM {event_table_name} e + WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_backfill' + ) + ORDER BY t.id + """) + ids_to_process = [row[0] for row in cursor.fetchall()] + + if not ids_to_process: + progress_callback(" No records need backfill") + return 0, 0.0 + + # Process records in batches using raw SQL + processed = 0 + batch_start_time = time.time() + model_start_time = time.time() # Track model start time + + # Get column names for the source table + with connection.cursor() as cursor: + cursor.execute(""" + SELECT column_name + FROM information_schema.columns + WHERE table_name = %s + ORDER BY ordinal_position + """, [table_name]) + source_columns = [row[0] for row in cursor.fetchall()] + + # Filter out excluded fields from source columns + source_columns = [col for col in source_columns if col not in excluded_fields] + + # Find the index of the 'id' column for pgh_obj_id mapping + try: + id_column_index = source_columns.index("id") + except ValueError: + # If id is excluded (shouldn't happen), fall back to first column + id_column_index = 0 + progress_callback(" Warning: 'id' column not found in source columns, using first column", "WARNING") + + # Process in batches + consecutive_failures = 0 + max_failures = 3 + + for i in range(0, len(ids_to_process), batch_size): + batch_ids = ids_to_process[i:i + batch_size] + + # Log progress every 10 batches + if i > 0 and i % (batch_size * 10) == 0: + progress_callback(f" Processing batch starting at index {i:,}...") + + # Get batch of records using raw SQL with specific IDs + columns_str = ", ".join(source_columns) + placeholders = ", ".join(["%s"] * len(batch_ids)) + query = f""" + SELECT {columns_str} FROM {table_name} t + WHERE t.id IN ({placeholders}) + ORDER BY t.id + """ + + with connection.cursor() as cursor: + cursor.execute(query, batch_ids) + batch_rows = cursor.fetchall() + + if not batch_rows: + progress_callback(f" No records found for batch at index {i}") + continue + + # Use PostgreSQL COPY + try: + # Use PostgreSQL COPY with psycopg3 syntax + with connection.cursor() as cursor: + # Get the underlying raw cursor to bypass Django's wrapper + raw_cursor = cursor.cursor + # Use the copy method (psycopg3 syntax) + copy_sql = f"COPY {event_table_name} ({', '.join(event_columns)}) FROM STDIN WITH (FORMAT text, DELIMITER E'\\t')" + + # Use psycopg3 copy syntax as per documentation + # Prepare data as list of tuples for write_row() + records = [] + for row in batch_rows: + row_data = [] + + # Create a mapping of source columns to values + source_values = {} + for idx, value in enumerate(row): + field_name = source_columns[idx] + source_values[field_name] = value + + # Build row data in the order of event_columns + for col in event_columns: + if col == "pgh_created_at": + row_data.append(timezone.now()) + elif col == "pgh_label": + row_data.append("initial_backfill") + elif col == "pgh_obj_id": + # Use the id column index instead of assuming position + row_data.append(row[id_column_index] if row[id_column_index] is not None else None) + elif col == "pgh_context_id": + row_data.append(None) # Empty for backfilled events + elif col in source_values: + row_data.append(source_values[col]) + else: + row_data.append(None) # Default NULL value + + records.append(tuple(row_data)) + + # Use COPY with write_row() as per psycopg3 docs + with raw_cursor.copy(copy_sql) as copy: + for record in records: + copy.write_row(record) + progress_callback(" COPY operation completed using write_row") + + # Commit the transaction to persist the data + raw_cursor.connection.commit() + + # Debug: Check if data was inserted + raw_cursor.execute(f"SELECT COUNT(*) FROM {event_table_name} WHERE pgh_label = 'initial_backfill'") + count = raw_cursor.fetchone()[0] + progress_callback(f" Records in event table after batch: {count}") + + batch_processed = len(batch_rows) + processed += batch_processed + consecutive_failures = 0 # Reset failure counter on success + + # Calculate timing + batch_end_time = time.time() + batch_duration = batch_end_time - batch_start_time + batch_records_per_second = batch_processed / batch_duration if batch_duration > 0 else 0 + + # Log progress + progress = (processed / backfill_count) * 100 + progress_callback( + f" Processed {processed:,}/{backfill_count:,} records ({progress:.1f}%) - " + f"Last batch: {batch_duration:.2f}s ({batch_records_per_second:.1f} records/sec)", + ) + + batch_start_time = time.time() # Reset for next batch + + except Exception as e: + consecutive_failures += 1 + logger.error(f"Bulk insert failed for {model_name} batch: {e}") + progress_callback(f" Bulk insert failed: {e}", "ERROR") + # Log more details about the error + progress_callback(f" Processed {processed:,} records before failure") + + if consecutive_failures >= max_failures: + progress_callback(f" Too many consecutive failures ({consecutive_failures}), stopping processing", "ERROR") + break + + # Continue with next batch instead of breaking + continue + + # Calculate total timing + model_end_time = time.time() + total_duration = model_end_time - model_start_time + records_per_second = processed / total_duration if total_duration > 0 else 0 + + progress_callback( + f" ✓ Completed {model_name}: {processed:,} records in {total_duration:.2f}s " + f"({records_per_second:.1f} records/sec)", + "SUCCESS", + ) + except Exception as e: + progress_callback(f" ✗ Failed to process {model_name}: {e}", "ERROR") + logger.exception(f"Error processing {model_name}") + return 0, 0.0 + else: + return processed, records_per_second + + +def get_tracked_models(): + """Get the list of models tracked by pghistory.""" + return [ + "Dojo_User", "Endpoint", "Engagement", "Finding", "Finding_Group", + "Product_Type", "Product", "Test", "Risk_Acceptance", + "Finding_Template", "Cred_User", "Notification_Webhooks", + "FindingReviewers", # M2M through table for Finding.reviewers + ] diff --git a/dojo/db_migrations/0248_pghistory_backfill.py b/dojo/db_migrations/0248_pghistory_backfill.py new file mode 100644 index 00000000000..e6bc1de0f06 --- /dev/null +++ b/dojo/db_migrations/0248_pghistory_backfill.py @@ -0,0 +1,85 @@ +# Generated manually for pghistory initial backfill + +import logging + +from django.conf import settings +from django.db import migrations + +from dojo.auditlog import ( + get_tracked_models, + process_model_backfill, +) + +logger = logging.getLogger(__name__) + + +def backfill_pghistory_tables(apps, schema_editor): + """ + Backfill pghistory tables with initial snapshots of existing records. + + This migration is fail-safe: if it fails for some reason, it will continue + where it left off on the next run, as it only processes records that don't + already have initial_backfill events. + """ + # Skip if auditlog is not enabled + if not settings.ENABLE_AUDITLOG: + logger.info("pghistory is not enabled. Skipping backfill.") + return + + # Check if we can use COPY (PostgreSQL only) + if settings.DATABASES["default"]["ENGINE"] != "django.db.backends.postgresql": + logger.warning( + "COPY operations only available with PostgreSQL. " + "Skipping backfill. Use the pghistory_backfill command instead.", + ) + return + + # Progress callback for migration logging + def progress_callback(msg, style=None): + """Progress callback that logs to Django's logger.""" + if style == "ERROR": + logger.error(msg) + elif style == "WARNING": + logger.warning(msg) + elif style == "SUCCESS": + logger.info(msg) + else: + logger.info(msg) + + # Get all tracked models + tracked_models = get_tracked_models() + + logger.info(f"Starting pghistory backfill for {len(tracked_models)} model(s)...") + + total_processed = 0 + for model_name in tracked_models: + logger.info(f"Processing {model_name}...") + try: + processed, _ = process_model_backfill( + model_name=model_name, + batch_size=10000, + dry_run=False, + progress_callback=progress_callback, + ) + total_processed += processed + except Exception as e: + logger.error(f"Failed to backfill {model_name}: {e}", exc_info=True) + # Continue with other models even if one fails + continue + + logger.info(f"Pghistory backfill complete: Processed {total_processed:,} records") + + +class Migration(migrations.Migration): + + dependencies = [ + ("dojo", "0247_findingreviewers_findingreviewersevent_and_more"), + ] + + operations = [ + migrations.RunPython( + backfill_pghistory_tables, + reverse_code=migrations.RunPython.noop, + ), + ] + diff --git a/dojo/filters.py b/dojo/filters.py index ebd8b023fb8..2dae6c156a4 100644 --- a/dojo/filters.py +++ b/dojo/filters.py @@ -3559,7 +3559,7 @@ class PgHistoryFilter(DojoFilter): ("insert", "Insert"), ("update", "Update"), ("delete", "Delete"), - ("initial_import", "Initial Import"), + ("initial_backfill", "Initial Backfill"), ], ) diff --git a/dojo/management/commands/pghistory_backfill.py b/dojo/management/commands/pghistory_backfill.py index 0d7789d435d..751799831a2 100644 --- a/dojo/management/commands/pghistory_backfill.py +++ b/dojo/management/commands/pghistory_backfill.py @@ -37,12 +37,7 @@ def add_arguments(self, parser): parser.add_argument( "--log-queries", action="store_true", - help="Enable database query logging (default: enabled)", - ) - parser.add_argument( - "--no-log-queries", - action="store_true", - help="Disable database query logging", + help="Enable database query logging (default: disabled)", ) def get_excluded_fields(self, model_name): @@ -149,15 +144,11 @@ def handle(self, *args, **options): return # Enable database query logging based on options - # Default to enabled unless explicitly disabled - enable_query_logging = not options.get("no_log_queries") + # Default to disabled unless explicitly enabled + enable_query_logging = options.get("log_queries", False) if enable_query_logging: self.enable_db_logging() - else: - self.stdout.write( - self.style.WARNING("Database query logging disabled"), - ) # Models that are tracked by pghistory tracked_models = [ @@ -220,23 +211,23 @@ def handle(self, *args, **options): ) continue - # Get IDs of records that already have initial_import events - existing_initial_import_ids = set( - EventModel.objects.filter(pgh_label="initial_import").values_list("pgh_obj_id", flat=True), + # Get IDs of records that already have initial_backfill events + existing_initial_backfill_ids = set( + EventModel.objects.filter(pgh_label="initial_backfill").values_list("pgh_obj_id", flat=True), ) - # Filter to only get records that don't have initial_import events - records_needing_backfill = Model.objects.exclude(id__in=existing_initial_import_ids) + # Filter to only get records that don't have initial_backfill events + records_needing_backfill = Model.objects.exclude(id__in=existing_initial_backfill_ids) backfill_count = records_needing_backfill.count() - existing_count = len(existing_initial_import_ids) + existing_count = len(existing_initial_backfill_ids) # Log the breakdown - self.stdout.write(f" Records with initial_import events: {existing_count:,}") - self.stdout.write(f" Records needing initial_import events: {backfill_count:,}") + self.stdout.write(f" Records with initial_backfill events: {existing_count:,}") + self.stdout.write(f" Records needing initial_backfill events: {backfill_count:,}") if backfill_count == 0: self.stdout.write( - self.style.SUCCESS(f" ✓ All {total_count:,} records already have initial_import events"), + self.style.SUCCESS(f" ✓ All {total_count:,} records already have initial_backfill events"), ) processed = total_count continue @@ -284,7 +275,7 @@ def handle(self, *args, **options): # Add pghistory-specific fields event_data.update({ - "pgh_label": "initial_import", + "pgh_label": "initial_backfill", "pgh_obj": instance, # ForeignKey to the original object "pgh_context": None, # No context for backfilled events }) diff --git a/dojo/management/commands/pghistory_backfill_fast.py b/dojo/management/commands/pghistory_backfill_fast.py index ac83168aad6..23682ad6c62 100644 --- a/dojo/management/commands/pghistory_backfill_fast.py +++ b/dojo/management/commands/pghistory_backfill_fast.py @@ -4,14 +4,16 @@ This command creates initial snapshots for all existing records in tracked models using PostgreSQL COPY for maximum performance. """ -import io import logging import time from django.conf import settings from django.core.management.base import BaseCommand -from django.db import connection -from django.utils import timezone + +from dojo.auditlog import ( + get_tracked_models, + process_model_backfill, +) logger = logging.getLogger(__name__) @@ -39,377 +41,28 @@ def add_arguments(self, parser): parser.add_argument( "--log-queries", action="store_true", - help="Enable database query logging (default: enabled)", + help="Enable database query logging (default: disabled)", ) - parser.add_argument( - "--no-log-queries", - action="store_true", - help="Disable database query logging", - ) - - def get_excluded_fields(self, model_name): - """Get the list of excluded fields for a specific model from pghistory configuration.""" - # Define excluded fields for each model (matching auditlog.py) - excluded_fields_map = { - "Dojo_User": ["password"], - "Product": ["updated"], # This is the key change - "Cred_User": ["password"], - "Notification_Webhooks": ["header_name", "header_value"], - } - return excluded_fields_map.get(model_name, []) def process_model_with_copy(self, model_name, batch_size, dry_run): """Process a single model using COPY operations with raw SQL.""" - try: - # Get table names using raw SQL - # Handle special cases for table naming - if model_name == "Dojo_User": - table_name = "dojo_dojo_user" - event_table_name = "dojo_dojo_userevent" - elif model_name == "Product_Type": - table_name = "dojo_product_type" - event_table_name = "dojo_product_typeevent" - elif model_name == "Finding_Group": - table_name = "dojo_finding_group" - event_table_name = "dojo_finding_groupevent" - elif model_name == "Risk_Acceptance": - table_name = "dojo_risk_acceptance" - event_table_name = "dojo_risk_acceptanceevent" - elif model_name == "Finding_Template": - table_name = "dojo_finding_template" - event_table_name = "dojo_finding_templateevent" - elif model_name == "Cred_User": - table_name = "dojo_cred_user" - event_table_name = "dojo_cred_userevent" - elif model_name == "Notification_Webhooks": - table_name = "dojo_notification_webhooks" - event_table_name = "dojo_notification_webhooksevent" + def progress_callback(msg, style=None): + """Progress callback that uses self.stdout.write with styling.""" + if style == "SUCCESS": + self.stdout.write(self.style.SUCCESS(msg)) + elif style == "ERROR": + self.stdout.write(self.style.ERROR(msg)) + elif style == "WARNING": + self.stdout.write(self.style.WARNING(msg)) else: - table_name = f"dojo_{model_name.lower()}" - event_table_name = f"dojo_{model_name.lower()}event" - - # Check if tables exist - with connection.cursor() as cursor: - cursor.execute(""" - SELECT EXISTS ( - SELECT FROM information_schema.tables - WHERE table_name = %s - ) - """, [table_name]) - table_exists = cursor.fetchone()[0] - - cursor.execute(""" - SELECT EXISTS ( - SELECT FROM information_schema.tables - WHERE table_name = %s - ) - """, [event_table_name]) - event_table_exists = cursor.fetchone()[0] - - if not table_exists: - self.stdout.write(f" Table {table_name} not found") - return 0, 0.0 - - if not event_table_exists: - self.stdout.write( - self.style.ERROR( - f" Event table {event_table_name} not found. " - f"Is {model_name} tracked by pghistory?", - ), - ) - return 0, 0.0 - - # Get total count using raw SQL - with connection.cursor() as cursor: - cursor.execute(f"SELECT COUNT(*) FROM {table_name}") - total_count = cursor.fetchone()[0] - - if total_count == 0: - self.stdout.write(f" No records found for {model_name}") - return 0, 0.0 - - self.stdout.write(f" Found {total_count:,} records") - - # Get excluded fields - excluded_fields = self.get_excluded_fields(model_name) - - # Check if records already have initial_import events using raw SQL - with connection.cursor() as cursor: - cursor.execute(f"SELECT COUNT(*) FROM {event_table_name} WHERE pgh_label = 'initial_import'") - existing_count = cursor.fetchone()[0] - - # Get records that need backfill using raw SQL - with connection.cursor() as cursor: - cursor.execute(f""" - SELECT COUNT(*) FROM {table_name} t - WHERE NOT EXISTS ( - SELECT 1 FROM {event_table_name} e - WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_import' - ) - """) - backfill_count = cursor.fetchone()[0] - - # Log the breakdown - self.stdout.write(f" Records with initial_import events: {existing_count:,}") - self.stdout.write(f" Records needing initial_import events: {backfill_count:,}") - - if backfill_count == 0: - self.stdout.write( - self.style.SUCCESS(f" ✓ All {total_count:,} records already have initial_import events"), - ) - return total_count, 0.0 - - if dry_run: - self.stdout.write(f" Would process {backfill_count:,} records using COPY...") - return backfill_count, 0.0 - - # Get event table columns using raw SQL (excluding auto-generated pgh_id) - with connection.cursor() as cursor: - cursor.execute(""" - SELECT column_name - FROM information_schema.columns - WHERE table_name = %s AND column_name != 'pgh_id' - ORDER BY ordinal_position - """, [event_table_name]) - event_columns = [row[0] for row in cursor.fetchall()] - - # Get all IDs that need backfill first - with connection.cursor() as cursor: - cursor.execute(f""" - SELECT t.id FROM {table_name} t - WHERE NOT EXISTS ( - SELECT 1 FROM {event_table_name} e - WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_import' - ) - ORDER BY t.id - """) - ids_to_process = [row[0] for row in cursor.fetchall()] - - if not ids_to_process: - self.stdout.write(" No records need backfill") - return 0, 0.0 - - # Process records in batches using raw SQL - processed = 0 - batch_start_time = time.time() - model_start_time = time.time() # Track model start time - - # Get column names for the source table - with connection.cursor() as cursor: - cursor.execute(""" - SELECT column_name - FROM information_schema.columns - WHERE table_name = %s - ORDER BY ordinal_position - """, [table_name]) - source_columns = [row[0] for row in cursor.fetchall()] - - # Filter out excluded fields from source columns - source_columns = [col for col in source_columns if col not in excluded_fields] - - # Process in batches - consecutive_failures = 0 - max_failures = 3 - - for i in range(0, len(ids_to_process), batch_size): - batch_ids = ids_to_process[i:i + batch_size] - - # Log progress every 10 batches - if i > 0 and i % (batch_size * 10) == 0: - self.stdout.write(f" Processing batch starting at index {i:,}...") - - # Get batch of records using raw SQL with specific IDs - columns_str = ", ".join(source_columns) - placeholders = ", ".join(["%s"] * len(batch_ids)) - query = f""" - SELECT {columns_str} FROM {table_name} t - WHERE t.id IN ({placeholders}) - ORDER BY t.id - """ - - with connection.cursor() as cursor: - cursor.execute(query, batch_ids) - batch_rows = cursor.fetchall() - - if not batch_rows: - self.stdout.write(f" No records found for batch at index {i}") - continue - - # Use PostgreSQL COPY as described in the article - try: - # Prepare data for COPY using a custom file-like object - class FileLikeObject: - def __init__(self): - self.data = io.BytesIO() - - def write(self, data): - return self.data.write(data) - - def read(self, size=-1): - return self.data.read(size) - - def seek(self, pos): - return self.data.seek(pos) - - def tell(self): - return self.data.tell() - - def __len__(self): - return len(self.data.getvalue()) - - def getvalue(self): - return self.data.getvalue() - - copy_buffer = FileLikeObject() - - for row in batch_rows: - row_data = [] - - # Create a mapping of source columns to values - source_values = {} - for idx, value in enumerate(row): - field_name = source_columns[idx] - # Convert value to string for COPY - if value is None: - source_values[field_name] = "" - elif isinstance(value, bool): - source_values[field_name] = "t" if value else "f" - elif hasattr(value, "isoformat"): # datetime objects - source_values[field_name] = value.isoformat() - else: - source_values[field_name] = str(value) - - # Build row data in the order of event_columns - for col in event_columns: - if col == "pgh_created_at": - row_data.append(timezone.now().isoformat()) - elif col == "pgh_label": - row_data.append("initial_import") - elif col == "pgh_obj_id": - row_data.append(str(row[0]) if row[0] is not None else "") # Assuming first column is id - elif col == "pgh_context_id": - row_data.append("") # Empty for backfilled events - elif col in source_values: - row_data.append(source_values[col]) - else: - row_data.append("") # Default empty value - - # Write tab-separated row to buffer as bytes - copy_buffer.write(("\t".join(row_data) + "\n").encode("utf-8")) - - copy_buffer.seek(0) - - # Debug: Show what we're about to copy - self.stdout.write(f" Batch {i // batch_size + 1}: Writing to table: {event_table_name}") - - # Use PostgreSQL COPY with psycopg3 syntax - with connection.cursor() as cursor: - # Get the underlying raw cursor to bypass Django's wrapper - raw_cursor = cursor.cursor - # Use the copy method (psycopg3 syntax) - copy_sql = f"COPY {event_table_name} ({', '.join(event_columns)}) FROM STDIN WITH (FORMAT text, DELIMITER E'\\t')" - - try: - # Use psycopg3 copy syntax as per documentation - # Prepare data as list of tuples for write_row() - records = [] - for row in batch_rows: - row_data = [] - - # Create a mapping of source columns to values - source_values = {} - for idx, value in enumerate(row): - field_name = source_columns[idx] - source_values[field_name] = value - - # Build row data in the order of event_columns - for col in event_columns: - if col == "pgh_created_at": - row_data.append(timezone.now()) - elif col == "pgh_label": - row_data.append("initial_import") - elif col == "pgh_obj_id": - row_data.append(row[0]) # Assuming first column is id - elif col == "pgh_context_id": - row_data.append(None) # Empty for backfilled events - elif col in source_values: - row_data.append(source_values[col]) - else: - row_data.append(None) # Default NULL value - - records.append(tuple(row_data)) - - # Use COPY with write_row() as per psycopg3 docs - with raw_cursor.copy(copy_sql) as copy: - for record in records: - copy.write_row(record) - self.stdout.write(" COPY operation completed using write_row") - - # Commit the transaction to persist the data - raw_cursor.connection.commit() - - # Debug: Check if data was inserted - raw_cursor.execute(f"SELECT COUNT(*) FROM {event_table_name} WHERE pgh_label = 'initial_import'") - count = raw_cursor.fetchone()[0] - self.stdout.write(f" Records in event table after batch: {count}") - - except Exception as copy_error: - self.stdout.write(f" COPY error: {copy_error}") - # Try to get more details about the error - raw_cursor.execute("SELECT * FROM pg_stat_activity WHERE state = 'active'") - self.stdout.write(f" Active queries: {raw_cursor.fetchall()}") - raise - - batch_processed = len(batch_rows) - processed += batch_processed - consecutive_failures = 0 # Reset failure counter on success - - # Calculate timing - batch_end_time = time.time() - batch_duration = batch_end_time - batch_start_time - batch_records_per_second = batch_processed / batch_duration if batch_duration > 0 else 0 - - # Log progress - progress = (processed / backfill_count) * 100 - self.stdout.write(f" Processed {processed:,}/{backfill_count:,} records ({progress:.1f}%) - " - f"Last batch: {batch_duration:.2f}s ({batch_records_per_second:.1f} records/sec)") - - batch_start_time = time.time() # Reset for next batch - - except Exception as e: - consecutive_failures += 1 - logger.error(f"Bulk insert failed for {model_name} batch: {e}") - self.stdout.write(f" Bulk insert failed: {e}") - # Log more details about the error - self.stdout.write(f" Processed {processed:,} records before failure") - - if consecutive_failures >= max_failures: - self.stdout.write(f" Too many consecutive failures ({consecutive_failures}), stopping processing") - break - - # Continue with next batch instead of breaking - continue - - # Calculate total timing - model_end_time = time.time() - total_duration = model_end_time - model_start_time - records_per_second = processed / total_duration if total_duration > 0 else 0 - - self.stdout.write( - self.style.SUCCESS( - f" ✓ Completed {model_name}: {processed:,} records in {total_duration:.2f}s " - f"({records_per_second:.1f} records/sec)", - ), - ) + self.stdout.write(msg) - return processed, records_per_second # noqa: TRY300 - - except Exception as e: - self.stdout.write( - self.style.ERROR(f" ✗ Failed to process {model_name}: {e}"), - ) - logger.exception(f"Error processing {model_name}") - return 0, 0.0 + return process_model_backfill( + model_name=model_name, + batch_size=batch_size, + dry_run=dry_run, + progress_callback=progress_callback, + ) def enable_db_logging(self): """Enable database query logging for this command.""" @@ -476,7 +129,7 @@ def handle(self, *args, **options): return # Enable database query logging based on options - enable_query_logging = not options.get("no_log_queries") + enable_query_logging = options.get("log_queries") if enable_query_logging: self.enable_db_logging() @@ -486,12 +139,7 @@ def handle(self, *args, **options): ) # Models that are tracked by pghistory - tracked_models = [ - "Dojo_User", "Endpoint", "Engagement", "Finding", "Finding_Group", - "Product_Type", "Product", "Test", "Risk_Acceptance", - "Finding_Template", "Cred_User", "Notification_Webhooks", - "FindingReviewers", # M2M through table for Finding.reviewers - ] + tracked_models = get_tracked_models() specific_model = options.get("model") if specific_model: @@ -518,7 +166,6 @@ def handle(self, *args, **options): self.stdout.write(f"Starting backfill for {len(tracked_models)} model(s) using PostgreSQL COPY...") for model_name in tracked_models: - time.time() self.stdout.write(f"\nProcessing {model_name}...") processed, _ = self.process_model_with_copy( diff --git a/dojo/management/commands/pghistory_backfill_simple.py b/dojo/management/commands/pghistory_backfill_simple.py index 0203b5506a7..ebc4d452473 100644 --- a/dojo/management/commands/pghistory_backfill_simple.py +++ b/dojo/management/commands/pghistory_backfill_simple.py @@ -119,7 +119,7 @@ def process_model_simple(self, model_name, batch_size, dry_run): SELECT COUNT(*) FROM {table_name} t WHERE NOT EXISTS ( SELECT 1 FROM {event_table_name} e - WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_import' + WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_backfill' ) """) backfill_count = cursor.fetchone()[0] @@ -165,7 +165,7 @@ def process_model_simple(self, model_name, batch_size, dry_run): if col == "pgh_created_at": select_columns.append("NOW() as pgh_created_at") elif col == "pgh_label": - select_columns.append("'initial_import' as pgh_label") + select_columns.append("'initial_backfill' as pgh_label") elif col == "pgh_obj_id": select_columns.append("t.id as pgh_obj_id") elif col == "pgh_context_id": @@ -181,7 +181,7 @@ def process_model_simple(self, model_name, batch_size, dry_run): SELECT t.id FROM {table_name} t WHERE NOT EXISTS ( SELECT 1 FROM {event_table_name} e - WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_import' + WHERE e.pgh_obj_id = t.id AND e.pgh_label = 'initial_backfill' ) ORDER BY t.id """) diff --git a/dojo/templates/dojo/action_history.html b/dojo/templates/dojo/action_history.html index f3867024943..904347b7c11 100644 --- a/dojo/templates/dojo/action_history.html +++ b/dojo/templates/dojo/action_history.html @@ -70,8 +70,8 @@

{{ h.pgh_obj_id|default:"N/A" }} - {% if h.pgh_label == "initial_import" %} - Initial Import + {% if h.pgh_label == "initial_backfill" %} + Initial Backfill {% elif h.pgh_diff %}
{% for field, values in h.pgh_diff.items %} From 60bf2b7cb8819f871d4b07bbaa35215ba191587f Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Thu, 6 Nov 2025 20:52:28 +0100 Subject: [PATCH 7/8] rebase migrations --- ...eviewers_findingreviewersevent_and_more.py | 64 ++++++++++++++ dojo/db_migrations/0249_pghistory_backfill.py | 85 +++++++++++++++++++ 2 files changed, 149 insertions(+) create mode 100644 dojo/db_migrations/0248_findingreviewers_findingreviewersevent_and_more.py create mode 100644 dojo/db_migrations/0249_pghistory_backfill.py diff --git a/dojo/db_migrations/0248_findingreviewers_findingreviewersevent_and_more.py b/dojo/db_migrations/0248_findingreviewers_findingreviewersevent_and_more.py new file mode 100644 index 00000000000..f06f0d3e12e --- /dev/null +++ b/dojo/db_migrations/0248_findingreviewers_findingreviewersevent_and_more.py @@ -0,0 +1,64 @@ +# Generated by Django 5.1.13 on 2025-11-01 17:04 + +import django.db.models.deletion +import pgtrigger.compiler +import pgtrigger.migrations +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("dojo", "0247_remove_finding_insert_insert_and_more") + ] + + operations = [ + migrations.CreateModel( + name="FindingReviewers", + fields=[ + ], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("dojo.finding_reviewers",), + ), + migrations.CreateModel( + name="FindingReviewersEvent", + fields=[ + ("pgh_id", models.AutoField(primary_key=True, serialize=False)), + ("pgh_created_at", models.DateTimeField(auto_now_add=True)), + ("pgh_label", models.TextField(help_text="The event label.")), + ("id", models.IntegerField()), + ("dojo_user", models.ForeignKey(db_constraint=False, db_index=False, db_tablespace="", on_delete=django.db.models.deletion.DO_NOTHING, related_name="+", related_query_name="+", to="dojo.dojo_user")), + ("finding", models.ForeignKey(db_constraint=False, db_index=False, db_tablespace="", on_delete=django.db.models.deletion.DO_NOTHING, related_name="+", related_query_name="+", to="dojo.finding")), + ("pgh_context", models.ForeignKey(db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name="+", to="pghistory.context")), + ("pgh_obj", models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, related_name="events", to="dojo.findingreviewers")), + ], + options={ + "abstract": False, + "db_table": "dojo_finding_reviewersevent", + }, + ), + migrations.AddIndex( + model_name="findingreviewersevent", + index=models.Index(fields=["pgh_created_at"], name="dojo_findin_pgh_cre_d5e5b4_idx"), + ), + migrations.AddIndex( + model_name="findingreviewersevent", + index=models.Index(fields=["pgh_label"], name="dojo_findin_pgh_lab_5517f9_idx"), + ), + migrations.AddIndex( + model_name="findingreviewersevent", + index=models.Index(fields=["pgh_context_id"], name="dojo_findin_pgh_con_06229b_idx"), + ), + pgtrigger.migrations.AddTrigger( + model_name="findingreviewers", + trigger=pgtrigger.compiler.Trigger(name="insert_insert", sql=pgtrigger.compiler.UpsertTriggerSql(func='INSERT INTO "dojo_finding_reviewersevent" ("dojo_user_id", "finding_id", "id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id") VALUES (NEW."dojo_user_id", NEW."finding_id", NEW."id", _pgh_attach_context(), NOW(), \'insert\', NEW."id"); RETURN NULL;', hash="5c1fd440159e49c929122cbb590f96983a1c934e", operation="INSERT", pgid="pgtrigger_insert_insert_0808c", table="dojo_finding_reviewers", when="AFTER")), + ), + pgtrigger.migrations.AddTrigger( + model_name="findingreviewers", + trigger=pgtrigger.compiler.Trigger(name="delete_delete", sql=pgtrigger.compiler.UpsertTriggerSql(func='INSERT INTO "dojo_finding_reviewersevent" ("dojo_user_id", "finding_id", "id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id") VALUES (OLD."dojo_user_id", OLD."finding_id", OLD."id", _pgh_attach_context(), NOW(), \'delete\', OLD."id"); RETURN NULL;', hash="23a4e01eaea469f708679392a6a92a6e16b21181", operation="DELETE", pgid="pgtrigger_delete_delete_40083", table="dojo_finding_reviewers", when="AFTER")), + ), + ] diff --git a/dojo/db_migrations/0249_pghistory_backfill.py b/dojo/db_migrations/0249_pghistory_backfill.py new file mode 100644 index 00000000000..117f8603e81 --- /dev/null +++ b/dojo/db_migrations/0249_pghistory_backfill.py @@ -0,0 +1,85 @@ +# Generated manually for pghistory initial backfill + +import logging + +from django.conf import settings +from django.db import migrations + +from dojo.auditlog import ( + get_tracked_models, + process_model_backfill, +) + +logger = logging.getLogger(__name__) + + +def backfill_pghistory_tables(apps, schema_editor): + """ + Backfill pghistory tables with initial snapshots of existing records. + + This migration is fail-safe: if it fails for some reason, it will continue + where it left off on the next run, as it only processes records that don't + already have initial_backfill events. + """ + # Skip if auditlog is not enabled + if not settings.ENABLE_AUDITLOG: + logger.info("pghistory is not enabled. Skipping backfill.") + return + + # Check if we can use COPY (PostgreSQL only) + if settings.DATABASES["default"]["ENGINE"] != "django.db.backends.postgresql": + logger.warning( + "COPY operations only available with PostgreSQL. " + "Skipping backfill. Use the pghistory_backfill command instead.", + ) + return + + # Progress callback for migration logging + def progress_callback(msg, style=None): + """Progress callback that logs to Django's logger.""" + if style == "ERROR": + logger.error(msg) + elif style == "WARNING": + logger.warning(msg) + elif style == "SUCCESS": + logger.info(msg) + else: + logger.info(msg) + + # Get all tracked models + tracked_models = get_tracked_models() + + logger.info(f"Starting pghistory backfill for {len(tracked_models)} model(s)...") + + total_processed = 0 + for model_name in tracked_models: + logger.info(f"Processing {model_name}...") + try: + processed, _ = process_model_backfill( + model_name=model_name, + batch_size=10000, + dry_run=False, + progress_callback=progress_callback, + ) + total_processed += processed + except Exception as e: + logger.error(f"Failed to backfill {model_name}: {e}", exc_info=True) + # Continue with other models even if one fails + continue + + logger.info(f"Pghistory backfill complete: Processed {total_processed:,} records") + + +class Migration(migrations.Migration): + + dependencies = [ + ("dojo", "0248_findingreviewers_findingreviewersevent_and_more"), + ] + + operations = [ + migrations.RunPython( + backfill_pghistory_tables, + reverse_code=migrations.RunPython.noop, + ), + ] + From a418a2f7ffd660db652dbbaf82945254152e0fe1 Mon Sep 17 00:00:00 2001 From: Valentijn Scholten Date: Thu, 6 Nov 2025 21:13:11 +0100 Subject: [PATCH 8/8] rebase migrations --- ...eviewers_findingreviewersevent_and_more.py | 64 -------------- dojo/db_migrations/0248_pghistory_backfill.py | 85 ------------------- 2 files changed, 149 deletions(-) delete mode 100644 dojo/db_migrations/0247_findingreviewers_findingreviewersevent_and_more.py delete mode 100644 dojo/db_migrations/0248_pghistory_backfill.py diff --git a/dojo/db_migrations/0247_findingreviewers_findingreviewersevent_and_more.py b/dojo/db_migrations/0247_findingreviewers_findingreviewersevent_and_more.py deleted file mode 100644 index 1da2d3e1739..00000000000 --- a/dojo/db_migrations/0247_findingreviewers_findingreviewersevent_and_more.py +++ /dev/null @@ -1,64 +0,0 @@ -# Generated by Django 5.1.13 on 2025-11-01 17:04 - -import django.db.models.deletion -import pgtrigger.compiler -import pgtrigger.migrations -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("dojo", "0246_endpoint_idx_ep_product_lower_host_and_more") - ] - - operations = [ - migrations.CreateModel( - name="FindingReviewers", - fields=[ - ], - options={ - "proxy": True, - "indexes": [], - "constraints": [], - }, - bases=("dojo.finding_reviewers",), - ), - migrations.CreateModel( - name="FindingReviewersEvent", - fields=[ - ("pgh_id", models.AutoField(primary_key=True, serialize=False)), - ("pgh_created_at", models.DateTimeField(auto_now_add=True)), - ("pgh_label", models.TextField(help_text="The event label.")), - ("id", models.IntegerField()), - ("dojo_user", models.ForeignKey(db_constraint=False, db_index=False, db_tablespace="", on_delete=django.db.models.deletion.DO_NOTHING, related_name="+", related_query_name="+", to="dojo.dojo_user")), - ("finding", models.ForeignKey(db_constraint=False, db_index=False, db_tablespace="", on_delete=django.db.models.deletion.DO_NOTHING, related_name="+", related_query_name="+", to="dojo.finding")), - ("pgh_context", models.ForeignKey(db_constraint=False, null=True, on_delete=django.db.models.deletion.DO_NOTHING, related_name="+", to="pghistory.context")), - ("pgh_obj", models.ForeignKey(db_constraint=False, on_delete=django.db.models.deletion.DO_NOTHING, related_name="events", to="dojo.findingreviewers")), - ], - options={ - "abstract": False, - "db_table": "dojo_finding_reviewersevent", - }, - ), - migrations.AddIndex( - model_name="findingreviewersevent", - index=models.Index(fields=["pgh_created_at"], name="dojo_findin_pgh_cre_d5e5b4_idx"), - ), - migrations.AddIndex( - model_name="findingreviewersevent", - index=models.Index(fields=["pgh_label"], name="dojo_findin_pgh_lab_5517f9_idx"), - ), - migrations.AddIndex( - model_name="findingreviewersevent", - index=models.Index(fields=["pgh_context_id"], name="dojo_findin_pgh_con_06229b_idx"), - ), - pgtrigger.migrations.AddTrigger( - model_name="findingreviewers", - trigger=pgtrigger.compiler.Trigger(name="insert_insert", sql=pgtrigger.compiler.UpsertTriggerSql(func='INSERT INTO "dojo_finding_reviewersevent" ("dojo_user_id", "finding_id", "id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id") VALUES (NEW."dojo_user_id", NEW."finding_id", NEW."id", _pgh_attach_context(), NOW(), \'insert\', NEW."id"); RETURN NULL;', hash="5c1fd440159e49c929122cbb590f96983a1c934e", operation="INSERT", pgid="pgtrigger_insert_insert_0808c", table="dojo_finding_reviewers", when="AFTER")), - ), - pgtrigger.migrations.AddTrigger( - model_name="findingreviewers", - trigger=pgtrigger.compiler.Trigger(name="delete_delete", sql=pgtrigger.compiler.UpsertTriggerSql(func='INSERT INTO "dojo_finding_reviewersevent" ("dojo_user_id", "finding_id", "id", "pgh_context_id", "pgh_created_at", "pgh_label", "pgh_obj_id") VALUES (OLD."dojo_user_id", OLD."finding_id", OLD."id", _pgh_attach_context(), NOW(), \'delete\', OLD."id"); RETURN NULL;', hash="23a4e01eaea469f708679392a6a92a6e16b21181", operation="DELETE", pgid="pgtrigger_delete_delete_40083", table="dojo_finding_reviewers", when="AFTER")), - ), - ] diff --git a/dojo/db_migrations/0248_pghistory_backfill.py b/dojo/db_migrations/0248_pghistory_backfill.py deleted file mode 100644 index e6bc1de0f06..00000000000 --- a/dojo/db_migrations/0248_pghistory_backfill.py +++ /dev/null @@ -1,85 +0,0 @@ -# Generated manually for pghistory initial backfill - -import logging - -from django.conf import settings -from django.db import migrations - -from dojo.auditlog import ( - get_tracked_models, - process_model_backfill, -) - -logger = logging.getLogger(__name__) - - -def backfill_pghistory_tables(apps, schema_editor): - """ - Backfill pghistory tables with initial snapshots of existing records. - - This migration is fail-safe: if it fails for some reason, it will continue - where it left off on the next run, as it only processes records that don't - already have initial_backfill events. - """ - # Skip if auditlog is not enabled - if not settings.ENABLE_AUDITLOG: - logger.info("pghistory is not enabled. Skipping backfill.") - return - - # Check if we can use COPY (PostgreSQL only) - if settings.DATABASES["default"]["ENGINE"] != "django.db.backends.postgresql": - logger.warning( - "COPY operations only available with PostgreSQL. " - "Skipping backfill. Use the pghistory_backfill command instead.", - ) - return - - # Progress callback for migration logging - def progress_callback(msg, style=None): - """Progress callback that logs to Django's logger.""" - if style == "ERROR": - logger.error(msg) - elif style == "WARNING": - logger.warning(msg) - elif style == "SUCCESS": - logger.info(msg) - else: - logger.info(msg) - - # Get all tracked models - tracked_models = get_tracked_models() - - logger.info(f"Starting pghistory backfill for {len(tracked_models)} model(s)...") - - total_processed = 0 - for model_name in tracked_models: - logger.info(f"Processing {model_name}...") - try: - processed, _ = process_model_backfill( - model_name=model_name, - batch_size=10000, - dry_run=False, - progress_callback=progress_callback, - ) - total_processed += processed - except Exception as e: - logger.error(f"Failed to backfill {model_name}: {e}", exc_info=True) - # Continue with other models even if one fails - continue - - logger.info(f"Pghistory backfill complete: Processed {total_processed:,} records") - - -class Migration(migrations.Migration): - - dependencies = [ - ("dojo", "0247_findingreviewers_findingreviewersevent_and_more"), - ] - - operations = [ - migrations.RunPython( - backfill_pghistory_tables, - reverse_code=migrations.RunPython.noop, - ), - ] -