diff --git a/rotate_backups/__init__.py b/rotate_backups/__init__.py index d144b6b..5d50cb9 100644 --- a/rotate_backups/__init__.py +++ b/rotate_backups/__init__.py @@ -228,6 +228,8 @@ def load_config_file(configuration_file=None, expand=True): # 'timestamp-pattern' configuration file option has a value set. if items.get('timestamp-pattern'): options['timestamp_pattern'] = items['timestamp-pattern'] + if items.get('stat-timestamp'): + options['stat_timestamp'] = items['stat-timestamp'] # Expand filename patterns? if expand and location.have_wildcards: logger.verbose("Expanding filename pattern %s on %s ..", location.directory, location.context) @@ -260,8 +262,134 @@ def rotate_backups(directory, rotation_scheme, **options): program.rotate_backups(directory) -class RotateBackups(PropertyManager): +class Match: + """An interface-like class for a date match.""" + + def match_to_datetime(self) -> datetime.datetime: + """Return a datetime from the Match.""" + pass + + +class Matcher: + """An interface-like class for a date-matching scheme.""" + + def search(self, location: str, entry: str) -> Match: + """Process an entry in a location and return a Match.""" + pass + + +class FilenameMatch(Match): + """A match based on a filename pattern.""" + + match: re.Match = None + + def __init__(self, match: re.Match): + """Make a Match from a regular expression match.""" + self.match = match + + def match_to_datetime(self) -> datetime.datetime: + """ + Convert the regular expression match to a :class:`~datetime.datetime` value. + + :returns: A :class:`~datetime.datetime` value. + :raises: :exc:`exceptions.ValueError` when a required date component is + not captured by the pattern, the captured value is an empty + string or the captured value cannot be interpreted as a + base-10 integer. + + .. seealso:: :data:`SUPPORTED_DATE_COMPONENTS` + """ + kw = {} + captures = self.match.groupdict() + for component, required in SUPPORTED_DATE_COMPONENTS: + value = captures.get(component) + if value: + kw[component] = int(value, 10) + elif required: + raise ValueError("Missing required date component! (%s)" % component) + else: + kw[component] = 0 + return datetime.datetime(**kw) + + +class FilenameMatcher(Matcher, PropertyManager): + """A date-matching scheme based on filenames.""" + + def search(self, location: str, entry: str) -> FilenameMatch: + """Apply the pattern to the entry's name, and return a Match if found.""" + if match := self.timestamp_pattern.search(entry): + return FilenameMatch(match) + + @mutable_property + def timestamp_pattern(self): + """ + The pattern used to extract timestamps from filenames (defaults to :data:`TIMESTAMP_PATTERN`). + + The value of this property is a compiled regular expression object. + Callers can provide their own compiled regular expression which + makes it possible to customize the compilation flags (see the + :func:`re.compile()` documentation for details). + + The regular expression pattern is expected to be a Python compatible + regular expression that defines the named capture groups 'year', + 'month' and 'day' and optionally 'hour', 'minute' and 'second'. + + String values are automatically coerced to compiled regular expressions + by calling :func:`~humanfriendly.coerce_pattern()`, in this case only + the :data:`re.VERBOSE` flag is used. + + If the caller provides a custom pattern it will be validated + to confirm that the pattern contains named capture groups + corresponding to each of the required date components + defined by :data:`SUPPORTED_DATE_COMPONENTS`. + """ + return TIMESTAMP_PATTERN + + @timestamp_pattern.setter + def timestamp_pattern(self, value): + """Coerce the value of :attr:`timestamp_pattern` to a compiled regular expression.""" + pattern = coerce_pattern(value, re.VERBOSE) + for component, required in SUPPORTED_DATE_COMPONENTS: + if component not in pattern.groupindex and required: + raise ValueError("Pattern is missing required capture group! (%s)" % component) + set_property(self, 'timestamp_pattern', pattern) + +class FilestatMatch(Match): + """A match based on a filename statistics.""" + + entry: str = None + field: str = None + + def __init__(self, entry: str, timestamp='mtime'): + """Make a Match from a field's stat attribute.""" + self.entry = entry + self.field = f'st_{timestamp}' + + def match_to_datetime(self) -> datetime.datetime: + """Return a datetime from the file's stat field.""" + return datetime.datetime.fromtimestamp( + getattr(os.stat(self.entry), self.field) + ) + + +class FilestatMatcher(Matcher): + """A date-matching scheme based on file statistics.""" + + timestamp: str = None + + def __init__(self, timestamp: str = 'mtime'): + """Make a Matcher based on a file's stat attribute.""" + self.timestamp = timestamp + + def search(self, location: str, entry: str) -> FilestatMatch: + """Return the file's stat attribute.""" + return FilestatMatch( + os.path.join(location.directory, entry), + self.timestamp) + + +class RotateBackups(PropertyManager): """Python API for the ``rotate-backups`` program.""" def __init__(self, rotation_scheme, **options): @@ -418,6 +546,19 @@ def rotation_scheme(self): in the dictionary. """ + @mutable_property + def stat_timestamp(self): + """Whether to use the files' mtime instead of parsing their name.""" + return isinstance(self.match, FilestatMatcher) + + @stat_timestamp.setter + def stat_timestamp(self, value): + if value: + logger.info("Using file mtime to determine file date") + self.matcher = FilestatMatcher() + elif isinstance(self.match, FilestatMatcher): + del self.matcher + @mutable_property def strict(self): """ @@ -447,39 +588,31 @@ def strict(self): """ return True - @mutable_property + @property def timestamp_pattern(self): - """ - The pattern used to extract timestamps from filenames (defaults to :data:`TIMESTAMP_PATTERN`). - - The value of this property is a compiled regular expression object. - Callers can provide their own compiled regular expression which - makes it possible to customize the compilation flags (see the - :func:`re.compile()` documentation for details). - - The regular expression pattern is expected to be a Python compatible - regular expression that defines the named capture groups 'year', - 'month' and 'day' and optionally 'hour', 'minute' and 'second'. - - String values are automatically coerced to compiled regular expressions - by calling :func:`~humanfriendly.coerce_pattern()`, in this case only - the :data:`re.VERBOSE` flag is used. - - If the caller provides a custom pattern it will be validated - to confirm that the pattern contains named capture groups - corresponding to each of the required date components - defined by :data:`SUPPORTED_DATE_COMPONENTS`. - """ - return TIMESTAMP_PATTERN + """Pattern to use to extract a timestamp from a filename.""" + if not isinstance(self.matcher, FilenameMatcher): + raise ValueError('Current matcher is not a FilenameMatcher') + return self.matcher.timestamp_pattern @timestamp_pattern.setter def timestamp_pattern(self, value): - """Coerce the value of :attr:`timestamp_pattern` to a compiled regular expression.""" - pattern = coerce_pattern(value, re.VERBOSE) - for component, required in SUPPORTED_DATE_COMPONENTS: - if component not in pattern.groupindex and required: - raise ValueError("Pattern is missing required capture group! (%s)" % component) - set_property(self, 'timestamp_pattern', pattern) + """Pattern to use to extract a timestamp from a filename.""" + if not isinstance(self.matcher, FilenameMatcher): + raise ValueError('Current matcher is not a FilenameMatcher') + self.matcher.timestamp_pattern = value + + @cached_property + def matcher(self): + """Matcher to use to extract a timestamp from a file.""" + return FilenameMatcher(timestamp_pattern=TIMESTAMP_PATTERN) + + @matcher.setter + def matcher(self, matcher): + """Matcher to use to extract a timestamp from a file.""" + if not isinstance(matcher, Matcher): + raise ValueError(f'{matcher} is not a Matcher') + set_property(self, 'matcher', matcher) def rotate_concurrent(self, *locations, **kw): """ @@ -642,8 +775,9 @@ def collect_backups(self, location): location = coerce_location(location) logger.info("Scanning %s for backups ..", location) location.ensure_readable(self.force) + for entry in natsort(location.context.list_entries(location.directory)): - match = self.timestamp_pattern.search(entry) + match = self.matcher.search(location, entry) if match: if self.exclude_list and any(fnmatch.fnmatch(entry, p) for p in self.exclude_list): logger.verbose("Excluded %s (it matched the exclude list).", entry) @@ -653,7 +787,7 @@ def collect_backups(self, location): try: backups.append(Backup( pathname=os.path.join(location.directory, entry), - timestamp=self.match_to_datetime(match), + timestamp=match.match_to_datetime(), )) except ValueError as e: logger.notice("Ignoring %s due to invalid date (%s).", entry, e) @@ -663,31 +797,6 @@ def collect_backups(self, location): logger.info("Found %i timestamped backups in %s.", len(backups), location) return sorted(backups) - def match_to_datetime(self, match): - """ - Convert a regular expression match to a :class:`~datetime.datetime` value. - - :param match: A regular expression match object. - :returns: A :class:`~datetime.datetime` value. - :raises: :exc:`exceptions.ValueError` when a required date component is - not captured by the pattern, the captured value is an empty - string or the captured value cannot be interpreted as a - base-10 integer. - - .. seealso:: :data:`SUPPORTED_DATE_COMPONENTS` - """ - kw = {} - captures = match.groupdict() - for component, required in SUPPORTED_DATE_COMPONENTS: - value = captures.get(component) - if value: - kw[component] = int(value, 10) - elif required: - raise ValueError("Missing required date component! (%s)" % component) - else: - kw[component] = 0 - return datetime.datetime(**kw) - def group_backups(self, backups): """ Group backups collected by :func:`collect_backups()` by rotation frequencies. diff --git a/rotate_backups/cli.py b/rotate_backups/cli.py index c7746fe..6d89b4a 100644 --- a/rotate_backups/cli.py +++ b/rotate_backups/cli.py @@ -168,6 +168,11 @@ readable and/or writable for the current user (or the user logged in to a remote system over SSH). + -s, --stat-timestamp + + Use mtime stat timestamps, instead of filenames, to determine the + date of each file. + -S, --syslog=CHOICE Explicitly enable or disable system logging instead of letting the program @@ -241,12 +246,12 @@ def main(): selected_locations = [] # Parse the command line arguments. try: - options, arguments = getopt.getopt(sys.argv[1:], 'M:H:d:w:m:y:t:I:x:jpri:c:C:uS:fnvqh', [ + options, arguments = getopt.getopt(sys.argv[1:], 'M:H:d:w:m:y:t:I:x:jpri:c:C:usS:fnvqh', [ 'minutely=', 'hourly=', 'daily=', 'weekly=', 'monthly=', 'yearly=', 'timestamp-pattern=', 'include=', 'exclude=', 'parallel', 'prefer-recent', 'relaxed', 'ionice=', 'config=', - 'removal-command=', 'use-sudo', 'syslog=', 'force', - 'dry-run', 'verbose', 'quiet', 'help', + 'removal-command=', 'use-sudo', 'stat-timestamp', 'syslog=', + 'force', 'dry-run', 'verbose', 'quiet', 'help', ]) for option, value in options: if option in ('-M', '--minutely'): @@ -284,6 +289,8 @@ def main(): kw['removal_command'] = removal_command elif option in ('-u', '--use-sudo'): use_sudo = True + elif option in ('-s', '--stat-timestamp'): + kw['stat_timestamp'] = True elif option in ('-S', '--syslog'): use_syslog = coerce_boolean(value) elif option in ('-f', '--force'): diff --git a/rotate_backups/tests.py b/rotate_backups/tests.py index c3b8078..9ba7d75 100644 --- a/rotate_backups/tests.py +++ b/rotate_backups/tests.py @@ -11,6 +11,7 @@ import datetime import logging import os +import time # External dependencies. from executor import ExternalCommandFailed @@ -21,6 +22,7 @@ # The module we're testing. from rotate_backups import ( RotateBackups, + FilenameMatcher, coerce_location, coerce_retention_period, load_config_file, @@ -217,6 +219,55 @@ def test_rotate_backups(self): backups_that_were_preserved = set(os.listdir(root)) assert backups_that_were_preserved == expected_to_be_preserved + def test_rotate_backups_mtime(self): + """Test the :func:`.rotate_backups()` function.""" + # These are the backups expected to be preserved. After each backup + # I've noted which rotation scheme it falls in and the number of + # preserved backups within that rotation scheme (counting up as we + # progress through the backups sorted by date). + expected_to_be_preserved = set([ + '2013-10-10@20:07', # monthly (1), yearly (1) + '2013-11-01@20:06', # monthly (2) + '2013-12-01@20:07', # monthly (3) + '2014-01-01@20:07', # monthly (4), yearly (2) + '2014-02-01@20:05', # monthly (5) + '2014-03-01@20:04', # monthly (6) + '2014-04-01@20:03', # monthly (7) + '2014-05-01@20:06', # monthly (8) + '2014-06-01@20:01', # monthly (9) + '2014-06-09@20:01', # weekly (1) + '2014-06-16@20:02', # weekly (2) + '2014-06-23@20:04', # weekly (3) + '2014-06-26@20:04', # daily (1) + '2014-06-27@20:02', # daily (2) + '2014-06-28@20:02', # daily (3) + '2014-06-29@20:01', # daily (4) + '2014-06-30@20:03', # daily (5), weekly (4) + '2014-07-01@20:02', # daily (6), monthly (10) + '2014-07-02@20:03', # hourly (1), daily (7) + ]) + with TemporaryDirectory(prefix='rotate-backups-', suffix='-test-suite') as root: + # Specify the rotation scheme and options through a configuration file. + config_file = os.path.join(root, 'rotate-backups.ini') + subdir = os.path.join(root, 'mtime') + parser = configparser.RawConfigParser() + parser.add_section(subdir) + parser.set(subdir, 'hourly', '24') + parser.set(subdir, 'daily', '7') + parser.set(subdir, 'weekly', '4') + parser.set(subdir, 'monthly', '12') + parser.set(subdir, 'yearly', 'always') + parser.set(subdir, 'ionice', 'idle') + parser.set(subdir, 'stat-timestamp', 'yes') + with open(config_file, 'w') as handle: + parser.write(handle) + self.create_sample_backup_set(root) + map = self.apply_mtime_and_rename(root, subdir) + run_cli(main, '--verbose', '--config=%s' % config_file) + backups_that_were_preserved = set(os.listdir(subdir)) + assert backups_that_were_preserved == set([map[e] + for e in expected_to_be_preserved]) + def test_rotate_concurrent(self): """Test the :func:`.rotate_concurrent()` function.""" # These are the backups expected to be preserved @@ -506,6 +557,21 @@ def create_sample_backup_set(self, root): for name in SAMPLE_BACKUP_SET: os.mkdir(os.path.join(root, name)) + def apply_mtime_and_rename(self, root, subdir): + """Extract mtime from filename, update file stat, rename and return a map of old to new name""" + os.mkdir(subdir) + fm = FilenameMatcher() + map = {} + for name in os.listdir(root): + map[name] = name + if match := fm.search(root, name): + file = os.path.join(root, name) + t = time.mktime(match.match_to_datetime().timetuple()) + os.utime(file, times=(t, t)) + os.rename(file, os.path.join(subdir, f'{t}s')) + map[name] = f'{t}s' + return map + @contextlib.contextmanager def readonly_directory(pathname):