Skip to content

Commit 0805ff1

Browse files
Merge pull request #107 from timvaillancourt/MCB_1.0-zbackup
MCB_1.0: ZBackup support
2 parents f4d00fe + 48054d2 commit 0805ff1

File tree

11 files changed

+275
-42
lines changed

11 files changed

+275
-42
lines changed

README.rst

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@ Features
1515
- Auto-discovers healthy members for backup by considering replication
1616
lag, replication 'priority' and by preferring 'hidden' members.
1717
- Creates cluster-consistent backups across many separate shards
18-
- Archives and compresses backups (*inline compression with mongodump
19-
3.2+*)
2018
- Transparent restore process (*just add --oplogReplay flag to your
2119
mongorestore command*)
20+
- Archiving and compression of backups
21+
- Block de-duplication and optional AES encryption at rest via `ZBackup <http://zbackup.org/>`__
22+
archiving method
2223
- AWS S3 Secure/HTTPS Multipart backup uploads (*optional*)
2324
- `Nagios NSCA <https://sourceforge.net/p/nagios/nsca>`__ push
2425
notification support (*optional*)
26+
- Modular backup, archiving, upload and notification components
2527
- Multi-threaded, single executable
2628

2729
Current Limitations
@@ -137,6 +139,36 @@ Run as Docker Container (Experimental)
137139
$ make docker
138140
$ docker run -t mongodb_consistent_backup <mongodb_consistent_backup-flags>
139141

142+
143+
ZBackup Archiving (Optional)
144+
~~~~~~~
145+
146+
`ZBackup <http://zbackup.org/>`__ *(with LZMA compression)* is an optional archive method for mongodb_consistent_backup. This archive method significantly reduces disk usage for backups via deduplication and compression.
147+
148+
ZBackup offers block de-duplication and compression of backups and optionally supports AES-128 encryption at rest. The ZBackup archive method causes backups to be stored via ZBackup at archive time.
149+
150+
To enable, ZBackup must be installed on your system and the 'archive.method' config file variable *(or --archive.method flag=)* must be set to 'zbackup'. Backups get stored in a repository directory named *mongodb_consistent_backup-zbackup* and must be restored using a 'zbackup restore' command.
151+
152+
**Install on CentOS/RHEL**
153+
154+
::
155+
156+
$ yum install zbackup
157+
158+
**Install on Debian/Ubuntu**
159+
160+
::
161+
162+
$ apt-get install zbackup
163+
164+
ZBackup data is stored in a repository directory named *mongodb_consistent_backup-zbackup* and must be restored using a 'zbackup restore ...' command.
165+
166+
**Get Backup from ZBackup Repo**
167+
168+
::
169+
170+
$ zbackup restore --password-file /etc/zbackup.passwd /mnt/backup/default/mongodb_consistent_backup-zbackup/backups/20170424_0000.tar
171+
140172
Roadmap
141173
~~~~~~~
142174

conf/mongodb-consistent-backup.example.conf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@ production:
3232
# tar:
3333
# compression: [none|gzip] (default: gzip, none if backup is compressed)
3434
# threads: [1+] (default: 1 per CPU)
35+
# zbackup:
36+
# binary: [path] (default: /usr/bin/zbackup)
37+
# cache_mb: [mb] (default: 128)
38+
# compression: [lzma] (only lzma is supported)
39+
# password_file: [path] (default: none)
40+
# threads: [1+] (default: 1 per CPU)
3541
notify:
3642
method: none
3743
# nsca:

mongodb_consistent_backup/Archive/Archive.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from mongodb_consistent_backup.Archive.Tar import Tar
2+
from mongodb_consistent_backup.Archive.Zbackup import Zbackup
23
from mongodb_consistent_backup.Pipeline import Stage
34

45

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
import os
2+
import logging
3+
4+
from select import select
5+
from subprocess import Popen, PIPE, call
6+
7+
from mongodb_consistent_backup.Errors import OperationError
8+
from mongodb_consistent_backup.Pipeline import Task
9+
10+
11+
class Zbackup(Task):
12+
def __init__(self, manager, config, timer, base_dir, backup_dir, **kwargs):
13+
super(Zbackup, self).__init__(self.__class__.__name__, manager, config, timer, base_dir, backup_dir, **kwargs)
14+
self.backup_name = self.config.backup.name
15+
self.backup_time = os.path.basename(self.backup_dir)
16+
self.zbackup_binary = self.config.archive.zbackup.binary
17+
self.zbackup_cache_mb = self.config.archive.zbackup.cache_mb
18+
self.zbackup_passwd_file = self.config.archive.zbackup.password_file
19+
20+
if self.config.archive.zbackup.threads and self.config.archive.zbackup.threads > 0:
21+
self.threads(self.config.archive.zbackup.threads)
22+
23+
# only lzma compression supported (for now)
24+
self.compression_method = 'lzma'
25+
self.compression_supported = ['lzma']
26+
27+
self.zbackup_dir = os.path.join(self.config.backup.location, self.backup_name, "mongodb-consistent-backup_zbackup")
28+
self.zbackup_backups = os.path.join(self.zbackup_dir, "backups")
29+
self.zbackup_backup_path = os.path.join(self.zbackup_backups, "%s.tar" % self.backup_time)
30+
self.zbackup_bundles = os.path.join(self.zbackup_dir, "bundles")
31+
self.zbackup_info = os.path.join(self.zbackup_dir, "info")
32+
self.backup_meta_dir = "mongodb-consistent-backup_META"
33+
34+
self.encrypted = False
35+
self._zbackup = None
36+
self._tar = None
37+
self._version = None
38+
39+
self.init()
40+
41+
def is_zbackup_init(self):
42+
if os.path.isfile(self.zbackup_info) and os.path.isdir(self.zbackup_backups) and os.path.isdir(self.zbackup_bundles):
43+
return True
44+
return False
45+
46+
def init(self):
47+
if os.path.isdir(self.zbackup_dir):
48+
if self.is_zbackup_init():
49+
logging.info("Found existing ZBackup storage dir at: %s (encrypted: %s)" % (self.zbackup_dir, self.encrypted))
50+
else:
51+
raise OperationError("ZBackup dir: %s is not a zbackup storage directory!" % self.zbackup_dir)
52+
else:
53+
try:
54+
cmd_line = [self.zbackup_binary]
55+
if self.zbackup_passwd_file:
56+
cmd_line.extend(["--password-file", self.zbackup_passwd_file, "init", self.zbackup_dir])
57+
logging.info("Using ZBackup AES encryption with password file: %s" % self.zbackup_passwd_file)
58+
self.encrypted = True
59+
else:
60+
cmd_line.extend(["--non-encrypted", "init", self.zbackup_dir])
61+
logging.warning("Initializing new ZBackup storage directory at: %s (encrypted: %s)" % (self.zbackup_dir, self.encrypted))
62+
logging.debug("Using ZBackup command: '%s'" % cmd_line)
63+
exit_code = call(cmd_line)
64+
if exit_code != 0:
65+
raise OperationError("ZBackup initialization failed! Exit code: %i" % exit_code)
66+
except Exception, e:
67+
raise OperationError("Error creating ZBackup storage directory! Error: %s" % e)
68+
69+
def version(self):
70+
if self._version:
71+
return self._version
72+
else:
73+
try:
74+
cmd = Popen([self.zbackup_binary, "--help"], stderr=PIPE)
75+
stdout, stderr = cmd.communicate()
76+
if stderr:
77+
line = stderr.split("\n")[0]
78+
if line.startswith("ZBackup") and "version " in line:
79+
fields = line.split(" ")
80+
version = fields[len(fields) - 1]
81+
if len(version.split(".")) == 3:
82+
self._version = version
83+
return self._version
84+
return None
85+
except OSError, e:
86+
return None
87+
except Exception, e:
88+
raise OperationError("Could not gather ZBackup version: %s" % e)
89+
90+
def has_zbackup(self):
91+
if self.version():
92+
return True
93+
return False
94+
95+
def close(self, exit_code=None, frame=None):
96+
del exit_code
97+
del frame
98+
if not self.stopped:
99+
if self._zbackup and self._zbackup.poll() == None:
100+
logging.debug("Stopping running ZBackup command")
101+
self._zbackup.terminate()
102+
if self._tar and self._tar.poll() == None:
103+
logging.debug("Stopping running ZBackup tar command")
104+
self._tar.terminate()
105+
self.stopped = True
106+
107+
def poll(self, timeout=1):
108+
try:
109+
poll = select([self._zbackup.stderr.fileno()], [], [], timeout)
110+
except ValueError:
111+
return
112+
if len(poll) >= 1:
113+
for fd in poll[0]:
114+
line = self._zbackup.stderr.readline()
115+
if line:
116+
logging.info(line.rstrip())
117+
118+
def wait(self):
119+
try:
120+
tar_done = False
121+
while self._zbackup.stderr and self._tar.stderr:
122+
self.poll()
123+
if tar_done:
124+
self._zbackup.communicate()
125+
if self._zbackup.poll() != None:
126+
logging.info("ZBackup completed successfully with exit code: %i" % self._zbackup.returncode)
127+
if self._zbackup.returncode != 0:
128+
raise OperationError("ZBackup exited with code: %i!" % self._zbackup.returncode)
129+
break
130+
elif self._tar.poll() != None:
131+
if self._tar.returncode == 0:
132+
logging.debug("ZBackup tar command completed successfully with exit code: %i" % self._tar.returncode)
133+
tar_done = True
134+
else:
135+
raise OperationError("ZBackup archiving failed on tar command with exit code: %i" % self._tar.returncode)
136+
except Exception, e:
137+
raise OperationError("Error reading ZBackup output: %s" % e)
138+
139+
def get_commands(self, base_dir, sub_dir):
140+
tar = ["tar", "--remove-files", "-C", base_dir, "-c", sub_dir]
141+
zbackup = [self.zbackup_binary, "--cache-size", "%imb" % self.zbackup_cache_mb, "--compression", self.compression()]
142+
zbackup_path = os.path.join(self.zbackup_backups, "%s.%s.tar" % (self.backup_time, sub_dir))
143+
if self.encrypted:
144+
zbackup.extend(["--password-file", self.zbackup_passwd_file, "backup", zbackup_path])
145+
else:
146+
zbackup.extend(["--non-encrypted", "backup", zbackup_path])
147+
return tar, zbackup
148+
149+
def run(self):
150+
if self.has_zbackup():
151+
try:
152+
logging.info("Starting ZBackup version: %s (options: compression=%s, encryption=%s, threads=%i, cache_mb=%i)" %
153+
(self.version(), self.compression(), self.encrypted, self.threads(), self.zbackup_cache_mb)
154+
)
155+
self.running = True
156+
try:
157+
for sub_dir in os.listdir(self.backup_dir):
158+
if sub_dir == self.backup_meta_dir:
159+
continue
160+
logging.info("Running ZBackup for path: %s" % os.path.join(self.backup_dir, sub_dir))
161+
tar_cmd, zbkp_cmd = self.get_commands(self.backup_dir, sub_dir)
162+
logging.debug("Running ZBackup tar command: %s" % tar_cmd)
163+
logging.debug("Running ZBackup command: %s" % zbkp_cmd)
164+
self._zbackup = Popen(zbkp_cmd, stdin=PIPE, stderr=PIPE)
165+
self._tar = Popen(tar_cmd, stdout=self._zbackup.stdin, stderr=PIPE)
166+
self.wait()
167+
except Exception, e:
168+
raise OperationError("Could not execute ZBackup: %s" % e)
169+
logging.info("Completed running all ZBackups")
170+
self.completed = True
171+
finally:
172+
self.running = False
173+
self.stopped = True
174+
else:
175+
raise OperationError("Cannot find ZBackup at %s!" % self.zbackup_binary)
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from Zbackup import Zbackup
2+
3+
4+
def config(parser):
5+
parser.add_argument("--archive.zbackup.binary", dest="archive.zbackup.binary", help="Path to ZBackup binary (default: /usr/bin/zbackup)", default='/usr/bin/zbackup', type=str)
6+
parser.add_argument("--archive.zbackup.cache_mb", dest="archive.zbackup.cache_mb", help="Megabytes of RAM to use as a cache for ZBackup (default: 128)", default=128, type=int)
7+
parser.add_argument("--archive.zbackup.compression", dest="archive.zbackup.compression", help="Type of compression to use with ZBackup (default: lzma)", default='lzma', choices=['lzma'], type=str)
8+
parser.add_argument("--archive.zbackup.password_file", dest="archive.zbackup.password_file", help="Path to ZBackup backup password file, enables AES encryption (default: none)", default=None, type=str)
9+
parser.add_argument("--archive.zbackup.threads", dest="archive.zbackup.threads", help="Number of threads to use for ZBackup (default: 1-per-CPU)", default=0, type=int)
10+
return parser

mongodb_consistent_backup/Archive/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33

44
def config(parser):
5-
parser.add_argument("--archive.method", dest="archive.method", help="Archiver method (default: tar)", default='tar', choices=['tar','none'])
5+
parser.add_argument("--archive.method", dest="archive.method", help="Archiver method (default: tar)", default='tar', choices=['tar','zbackup','none'])
66
return parser

mongodb_consistent_backup/Backup/Mongodump/Mongodump.py

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@ def __init__(self, manager, config, timer, base_dir, backup_dir, replsets, shard
2626
self.replsets = replsets
2727
self.sharding = sharding
2828

29-
self.version = 'unknown'
30-
self.threads_max = 16
31-
self.config_replset = False
32-
self.dump_threads = []
33-
self.states = {}
34-
self._summary = {}
29+
self.compression_supported = ['none', 'gzip']
30+
self.version = 'unknown'
31+
self.threads_max = 16
32+
self.config_replset = False
33+
self.dump_threads = []
34+
self.states = {}
35+
self._summary = {}
3536

3637
if self.config.backup.mongodump.threads and self.config.backup.mongodump.threads > 0:
3738
self.threads(self.config.backup.mongodump.threads)
@@ -162,12 +163,14 @@ def run(self):
162163
return self._summary
163164

164165
def close(self):
165-
logging.info("Stopping all mongodump threads")
166-
if len(self.dump_threads) > 0:
167-
for thread in self.dump_threads:
168-
thread.terminate()
169-
try:
170-
self.timer.stop(self.timer_name)
171-
except:
172-
pass
173-
logging.info("Stopped all mongodump threads")
166+
if not self.stopped:
167+
logging.info("Stopping all mongodump threads")
168+
if len(self.dump_threads) > 0:
169+
for thread in self.dump_threads:
170+
thread.terminate()
171+
try:
172+
self.timer.stop(self.timer_name)
173+
except:
174+
pass
175+
logging.info("Stopped all mongodump threads")
176+
self.stopped = True

mongodb_consistent_backup/Oplog/Resolver/Resolver.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ def __init__(self, manager, config, timer, base_dir, backup_dir, tailed_oplogs,
3030
self.tailed_oplogs = tailed_oplogs
3131
self.backup_oplogs = backup_oplogs
3232

33-
self.resolver_summary = {}
34-
self.resolver_state = {}
33+
self.compression_supported = ['none', 'gzip']
34+
self.resolver_summary = {}
35+
self.resolver_state = {}
3536

3637
try:
3738
self._pool = Pool(processes=self.threads(None, 2))

mongodb_consistent_backup/Oplog/Tailer/Tailer.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,9 @@ def __init__(self, manager, config, timer, base_dir, backup_dir, replsets):
2323
self.status_secs = self.config.oplog.tailer.status_interval
2424
self.replsets = replsets
2525

26-
self.shards = {}
27-
self._summary = {}
26+
self.compression_supported = ['none', 'gzip']
27+
self.shards = {}
28+
self._summary = {}
2829

2930
def summary(self):
3031
return self._summary

mongodb_consistent_backup/Pipeline/Task.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,17 @@ def __init__(self, task_name, manager, config, timer, base_dir, backup_dir, **kw
2323
self.completed = False
2424
self.exit_code = 255
2525

26-
self.thread_count = None
27-
self.cpu_count = cpu_count()
28-
self.compression_method = 'none'
29-
self.timer_name = self.__class__.__name__
26+
self.thread_count = None
27+
self.cpu_count = cpu_count()
28+
self.compression_method = 'none'
29+
self.compression_supported = ['none']
30+
self.timer_name = self.__class__.__name__
3031

3132
signal(SIGINT, SIG_IGN)
3233
signal(SIGTERM, self.close)
3334

3435
def compression(self, method=None):
35-
if method:
36+
if method and method in self.compression_supported:
3637
self.compression_method = parse_method(method)
3738
logging.info("Setting %s compression method: %s" % (self.task_name, self.compression_method))
3839
return parse_method(self.compression_method)

0 commit comments

Comments
 (0)