From d5f00e572b6b634bd1ef2f271c8710542be6db27 Mon Sep 17 00:00:00 2001 From: Anton Krytskyi Date: Thu, 23 Oct 2025 17:49:12 +0300 Subject: [PATCH] force archive fixes --- admin/nodes/views.py | 30 ++++++++++++------------ osf/management/commands/force_archive.py | 24 +++++++++++++++---- website/archiver/tasks.py | 27 +++++++++++++++++++++ 3 files changed, 62 insertions(+), 19 deletions(-) diff --git a/admin/nodes/views.py b/admin/nodes/views.py index 74321c8f908..72f529a7e0e 100644 --- a/admin/nodes/views.py +++ b/admin/nodes/views.py @@ -52,6 +52,7 @@ from scripts.approve_registrations import approve_past_pendings from website import settings, search +from website.archiver.tasks import force_archive class NodeMixin(PermissionRequiredMixin): @@ -723,7 +724,7 @@ class CheckArchiveStatusRegistrationsView(NodeMixin, View): def get(self, request, *args, **kwargs): # Prevents circular imports that cause admin app to hang at startup - from osf.management.commands.force_archive import check + from osf.management.commands.force_archive import check, DEFAULT_PERMISSIBLE_ADDONS registration = self.get_object() @@ -731,8 +732,11 @@ def get(self, request, *args, **kwargs): messages.success(request, f"Registration {registration._id} is archived.") return redirect(self.get_success_url()) + addons = set(registration.registered_from.get_addon_names()) + addons.update(DEFAULT_PERMISSIBLE_ADDONS) + try: - archive_status = check(registration) + archive_status = check(registration, permissible_addons=addons) messages.success(request, archive_status) except RegistrationStuckError as exc: messages.error(request, str(exc)) @@ -753,7 +757,7 @@ class ForceArchiveRegistrationsView(NodeMixin, View): def post(self, request, *args, **kwargs): # Prevents circular imports that cause admin app to hang at startup - from osf.management.commands.force_archive import verify, archive, DEFAULT_PERMISSIBLE_ADDONS + from osf.management.commands.force_archive import verify, DEFAULT_PERMISSIBLE_ADDONS registration = self.get_object() force_archive_params = request.POST @@ -778,18 +782,14 @@ def post(self, request, *args, **kwargs): if dry_mode: messages.success(request, f"Registration {registration._id} can be archived.") else: - try: - archive( - registration, - permissible_addons=addons, - allow_unconfigured=allow_unconfigured, - skip_collisions=skip_collision, - delete_collisions=delete_collision, - ) - messages.success(request, 'Registration archive process has finished.') - except Exception as exc: - messages.error(request, f'This registration cannot be archived due to {exc.__class__.__name__}: {str(exc)}. ' - f'If the problem persists get a developer to fix it.') + force_archive_task = force_archive.delay( + str(registration._id), + permissible_addons=list(addons), + allow_unconfigured=allow_unconfigured, + skip_collisions=skip_collision, + delete_collisions=delete_collision, + ) + messages.success(request, f'Registration archive process has started. Task id: {force_archive_task.id}.') return redirect(self.get_success_url()) diff --git a/osf/management/commands/force_archive.py b/osf/management/commands/force_archive.py index e2667325c15..1535a8edd81 100644 --- a/osf/management/commands/force_archive.py +++ b/osf/management/commands/force_archive.py @@ -36,10 +36,13 @@ from addons.osfstorage.models import OsfStorageFile, OsfStorageFolder, OsfStorageFileNode from framework import sentry from framework.exceptions import HTTPError +from osf import features from osf.models import AbstractNode, Node, NodeLog, Registration, BaseFileNode from osf.models.files import TrashedFileNode +from osf.utils.requests import get_current_request from osf.exceptions import RegistrationStuckRecoverableException, RegistrationStuckBrokenException from api.base.utils import waterbutler_api_url_for +from api.waffle.utils import flag_is_active from scripts import utils as script_utils from website.archiver import ARCHIVER_SUCCESS from website.settings import ARCHIVE_TIMEOUT_TIMEDELTA, ARCHIVE_PROVIDER, COOKIE_NAME @@ -149,9 +152,11 @@ def complete_archive_target(reg, addon_short_name): def perform_wb_copy(reg, node_settings, delete_collisions=False, skip_collisions=False): src, dst, user = reg.archive_job.info() - if dst.files.filter(name=node_settings.archive_folder_name.replace('/', '-')).exists(): + dst_storage = dst.get_addon('osfstorage') + archive_name = node_settings.archive_folder_name.replace('/', '-') + if dst_storage.get_root().children.filter(name=archive_name).exists(): if not delete_collisions and not skip_collisions: - raise Exception('Archive folder for {} already exists. Investigate manually and rerun with either --delete-collisions or --skip-collisions') + raise Exception(f'Archive folder for {archive_name} already exists. Investigate manually and rerun with either --delete-collisions or --skip-collisions') if delete_collisions: archive_folder = dst.files.exclude(type='osf.trashedfolder').get(name=node_settings.archive_folder_name.replace('/', '-')) logger.info(f'Removing {archive_folder}') @@ -393,12 +398,23 @@ def archive(registration, *args, permissible_addons=DEFAULT_PERMISSIBLE_ADDONS, logger.info(f'Preparing to archive {reg._id}') for short_name in permissible_addons: node_settings = reg.registered_from.get_addon(short_name) + if not node_settings and short_name != 'osfstorage' and flag_is_active(get_current_request(), features.ENABLE_GV): + # get_addon() returns None for addons when archive is running inside of + # the celery task. In this case, try to get addon settings from the GV + try: + from website.archiver.tasks import get_addon_from_gv + node_settings = get_addon_from_gv(reg.registered_from, short_name, reg.registered_from.creator) + except Exception as e: + logger.warning(f'Could not load {short_name} from GV: {e}') + if not hasattr(node_settings, '_get_file_tree'): # Excludes invalid or None-type + logger.warning(f"Skipping {short_name} for {registration._id}.") continue if not node_settings.configured: if not allow_unconfigured: raise Exception(f'{reg._id}: {short_name} on {reg.registered_from._id} is not configured. If this is permissible, re-run with `--allow-unconfigured`.') + logger.warning(f"{short_name} is not configured for {registration._id}.") continue if not reg.archive_job.get_target(short_name) or reg.archive_job.get_target(short_name).status == ARCHIVER_SUCCESS: continue @@ -486,7 +502,7 @@ def verify_registrations(registration_ids, permissible_addons): else: SKIPPED.append(reg) -def check(reg): +def check(reg, *args, **kwargs): """Check registration status. Raise exception if registration stuck.""" logger.info(f'Checking {reg._id}') if reg.is_deleted: @@ -503,7 +519,7 @@ def check(reg): still_archiving = not archive_tree_finished if still_archiving and root_job.datetime_initiated < expired_if_before: logger.warning(f'Registration {reg._id} is stuck in archiving') - if verify(reg): + if verify(reg, *args, **kwargs): raise RegistrationStuckRecoverableException(f'Registration {reg._id} is stuck and verified recoverable') else: raise RegistrationStuckBrokenException(f'Registration {reg._id} is stuck and verified broken') diff --git a/website/archiver/tasks.py b/website/archiver/tasks.py index 42e5bfb568b..d650e54e392 100644 --- a/website/archiver/tasks.py +++ b/website/archiver/tasks.py @@ -35,6 +35,7 @@ from osf.models import ( ArchiveJob, AbstractNode, + Registration, DraftRegistration, ) from osf import features @@ -370,3 +371,29 @@ def archive_success(self, dst_pk, job_pk): dst.sanction.ask(dst.get_active_contributors_recursive(unique_users=True)) dst.update_search() + + +@celery_app.task(bind=True) +def force_archive(self, registration_id, permissible_addons, allow_unconfigured=False, skip_collisions=False, delete_collisions=False): + from osf.management.commands.force_archive import archive + + create_app_context() + + try: + registration = AbstractNode.load(registration_id) + if not registration or not isinstance(registration, Registration): + return f'Registration {registration_id} not found' + + archive( + registration, + permissible_addons=set(permissible_addons), + allow_unconfigured=allow_unconfigured, + skip_collisions=skip_collisions, + delete_collisions=delete_collisions, + ) + return f'Registration {registration_id} archive completed' + + except Exception as exc: + sentry.log_message(f'Archive task failed for {registration_id}: {exc}') + sentry.log_exception(exc) + return f'{exc.__class__.__name__}: {str(exc)}'