From b2c5e55c4f026ea945d957bb648a93c9c35bcf6c Mon Sep 17 00:00:00 2001 From: Roland Sommer Date: Thu, 5 May 2022 08:10:37 +0200 Subject: [PATCH 1/3] Allow unixtime-style timestamp extraction Some backup solutions prefer using UNIX timestamps for tagging backup files or directories. In order to enable the rotation of such files/directories some extra handling is added to enable scanning for UNIX timestamps in file- or directory-names. --- README.rst | 5 ++-- rotate_backups/__init__.py | 59 +++++++++++++++++++++++++++++--------- rotate_backups/cli.py | 5 ++-- 3 files changed, 51 insertions(+), 18 deletions(-) diff --git a/README.rst b/README.rst index d23d416..ae6a4b5 100644 --- a/README.rst +++ b/README.rst @@ -155,8 +155,9 @@ intended you have no right to complain ;-). usage of the ``-H``, ``--hourly`` option for details about ``COUNT``." "``-t``, ``--timestamp-pattern=PATTERN``","Customize the regular expression pattern that is used to match and extract timestamps from filenames. ``PATTERN`` is expected to be a Python compatible - regular expression that must define the named capture groups 'year', - 'month' and 'day' and may define 'hour', 'minute' and 'second'." + regular expression that must define a named capture group 'unixtime' or the + named capture groups 'year', 'month' and 'day' and may define 'hour', + 'minute' and 'second'." "``-I``, ``--include=PATTERN``","Only process backups that match the shell pattern given by ``PATTERN``. This argument can be repeated. Make sure to quote ``PATTERN`` so the shell doesn't expand the pattern before it's received by rotate-backups." diff --git a/rotate_backups/__init__.py b/rotate_backups/__init__.py index d144b6b..8d41b71 100644 --- a/rotate_backups/__init__.py +++ b/rotate_backups/__init__.py @@ -447,6 +447,16 @@ def strict(self): """ return True + @mutable_property + def _is_unixtime(self): + """ + Is the given pattern used to extract a unix timestamp? + + This private property reflects if the given regex is used to exctract + a unix timestamp from file- or directorynames. + """ + return False + @mutable_property def timestamp_pattern(self): """ @@ -458,8 +468,9 @@ def timestamp_pattern(self): :func:`re.compile()` documentation for details). The regular expression pattern is expected to be a Python compatible - regular expression that defines the named capture groups 'year', - 'month' and 'day' and optionally 'hour', 'minute' and 'second'. + regular expression that defines the named capture group 'unixtime' or + the named capture groups 'year', 'month' and 'day' and optionally + 'hour', 'minute' and 'second'. String values are automatically coerced to compiled regular expressions by calling :func:`~humanfriendly.coerce_pattern()`, in this case only @@ -476,10 +487,15 @@ def timestamp_pattern(self): def timestamp_pattern(self, value): """Coerce the value of :attr:`timestamp_pattern` to a compiled regular expression.""" pattern = coerce_pattern(value, re.VERBOSE) - for component, required in SUPPORTED_DATE_COMPONENTS: - if component not in pattern.groupindex and required: - raise ValueError("Pattern is missing required capture group! (%s)" % component) - set_property(self, 'timestamp_pattern', pattern) + if "unixtime" in pattern.groupindex: + set_property(self, 'timestamp_pattern', pattern) + self._is_unixtime = True + else: + for component, required in SUPPORTED_DATE_COMPONENTS: + if component not in pattern.groupindex and required: + raise ValueError("Pattern is missing required capture group! (%s)" % component) + set_property(self, 'timestamp_pattern', pattern) + self._is_unixtime = False def rotate_concurrent(self, *locations, **kw): """ @@ -678,15 +694,30 @@ def match_to_datetime(self, match): """ kw = {} captures = match.groupdict() - for component, required in SUPPORTED_DATE_COMPONENTS: - value = captures.get(component) - if value: - kw[component] = int(value, 10) - elif required: - raise ValueError("Missing required date component! (%s)" % component) + if self._is_unixtime: + base = int(match.groupdict().get("unixtime")) + # Try seconds- and milliseconds-precision timestamps. + for value in (base, base / 1000): + try: + timestamp = datetime.datetime.fromtimestamp(value) + break + except ValueError: + timestamp = None + if timestamp is None: + logger.notice("Ignoring %s due to invalid date (%s).", value, match.group()) else: - kw[component] = 0 - return datetime.datetime(**kw) + logger.verbose("Extracted timestamp %r from %r", timestamp, value) + return timestamp + else: + for component, required in SUPPORTED_DATE_COMPONENTS: + value = captures.get(component) + if value: + kw[component] = int(value, 10) + elif required: + raise ValueError("Missing required date component! (%s)" % component) + else: + kw[component] = 0 + return datetime.datetime(**kw) def group_backups(self, backups): """ diff --git a/rotate_backups/cli.py b/rotate_backups/cli.py index c7746fe..f1bedf6 100644 --- a/rotate_backups/cli.py +++ b/rotate_backups/cli.py @@ -73,8 +73,9 @@ Customize the regular expression pattern that is used to match and extract timestamps from filenames. PATTERN is expected to be a Python compatible - regular expression that must define the named capture groups 'year', - 'month' and 'day' and may define 'hour', 'minute' and 'second'. + regular expression that must define the named capture group 'unixtime' or + the named capture groups 'year', 'month' and 'day' and may define 'hour', + 'minute' and 'second'. -I, --include=PATTERN From 8238cef19f6a0528567aa150d14af069b7398600 Mon Sep 17 00:00:00 2001 From: Roland Sommer Date: Thu, 5 May 2022 08:10:37 +0200 Subject: [PATCH 2/3] Add simple test for UNIX timestamp extraction --- rotate_backups/__init__.py | 2 +- rotate_backups/tests.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/rotate_backups/__init__.py b/rotate_backups/__init__.py index 8d41b71..037abea 100644 --- a/rotate_backups/__init__.py +++ b/rotate_backups/__init__.py @@ -704,7 +704,7 @@ def match_to_datetime(self, match): except ValueError: timestamp = None if timestamp is None: - logger.notice("Ignoring %s due to invalid date (%s).", value, match.group()) + raise ValueError("%r could not be extracted as unix timestamp") else: logger.verbose("Extracted timestamp %r from %r", timestamp, value) return timestamp diff --git a/rotate_backups/tests.py b/rotate_backups/tests.py index c3b8078..5a8b966 100644 --- a/rotate_backups/tests.py +++ b/rotate_backups/tests.py @@ -145,6 +145,21 @@ def test_argument_validation(self): returncode, output = run_cli(main, '-n', '/root') assert returncode != 0 + def test_timestamp_dates(self): + """Make sure filenames with unix timestamps don't cause an exception.""" + with TemporaryDirectory(prefix='rotate-backups-', suffix='-test-suite') as root: + file_with_valid_date = os.path.join(root, 'snapshot-1612396800061.tar.gz') + file_with_invalid_date = os.path.join(root, 'snapshot-1807311501019237.tar.gz') + for filename in file_with_valid_date, file_with_invalid_date: + touch(filename) + program = RotateBackups( + rotation_scheme=dict(monthly='always'), + timestamp_pattern=r"-(?P\d+)\.tar\.gz" + ) + backups = program.collect_backups(root) + assert len(backups) == 1 + assert backups[0].pathname == file_with_valid_date + def test_invalid_dates(self): """Make sure filenames with invalid dates don't cause an exception.""" with TemporaryDirectory(prefix='rotate-backups-', suffix='-test-suite') as root: From d775f3c1cd962f41a5f7359cb046fd2b0270cdd6 Mon Sep 17 00:00:00 2001 From: Roland Sommer Date: Thu, 5 May 2022 08:11:50 +0200 Subject: [PATCH 3/3] Add a short example for UNIX timestamp extraction --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index ae6a4b5..c431e5f 100644 --- a/README.rst +++ b/README.rst @@ -64,6 +64,12 @@ Features (?P\d{2})? )? + If your files are for example suffixed with UNIX timestamps, you can specify a + regular expression exposing a named capture group ``unixtime`` like this:: + + # Use UNIX timestamps + (?P\d+) + **All actions are logged** Log messages are saved to the system log (e.g. ``/var/log/syslog``) so you can retrace what happened when something seems to have gone wrong.