From a648df57f761d3d319228684fafe7c49b63e6b60 Mon Sep 17 00:00:00 2001 From: Aaron LI Date: Wed, 17 Jan 2018 00:32:06 +0800 Subject: Move backup.py and dar_backup.py to backup directory --- backup/backup.py | 274 ++++++++++++++++++++++++++++++++++++++++++++ backup/dar-backup.py | 315 +++++++++++++++++++++++++++++++++++++++++++++++++++ bin/backup.py | 273 -------------------------------------------- bin/dar_backup.py | 315 --------------------------------------------------- 4 files changed, 589 insertions(+), 588 deletions(-) create mode 100755 backup/backup.py create mode 100755 backup/dar-backup.py delete mode 100755 bin/backup.py delete mode 100755 bin/dar_backup.py diff --git a/backup/backup.py b/backup/backup.py new file mode 100755 index 0000000..8a72cc5 --- /dev/null +++ b/backup/backup.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2017 Aaron LI +# MIT license +# +# 2017-05-11 +# + +""" +Backup the specified files & directories with the metadata (e.g., ownership, +permission) preserved as much as possible. + +The extended attributes (i.e., xattr) are only available on Linux only. + + +Example configuration (YAML format) +----------------------------------- +src_root : / +sources : [] +dest_root : /backup +dest_remove : [] +syspath : [] +----------------------------------- + +References +---------- +""" + +import os +import sys +import stat +import argparse +import subprocess +import logging +from fnmatch import fnmatch +from datetime import datetime + +try: + from yaml import load as yaml_load +except ImportError: + from ruamel.yaml import safe_load as yaml_load + + +logging.basicConfig(level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S") +progname = os.path.basename(sys.argv[0]) +logger = logging.getLogger(progname) + +# Protected system paths +# The destination backup path is checked against these paths, and cannot +# located where matches these paths, thus to avoid the system files being +# overwritten or removed in accidence. +syspath = set(["/", "/home", "/media", "/mnt", "/root", + "/bin/*", "/boot/*", "/dev/*", "/etc/*", "/lib/*", "/lib64/*", + "/proc/*", "/sbin/*", "/sys/*", "/usr/*", "/var/*"]) + + +class Backup: + """ + Backup files with metadata preserved. + """ + def __init__(self, configfile, dryrun=False, debug=False): + self.configfile = configfile + self.config = yaml_load(open(configfile)) + logger.info("Loaded configuration file: %s" % configfile) + src_root = self.config.get("src_root", "/") + if os.path.isabs(src_root): + self.src_root = src_root + logger.info("Source root directory: %s" % self.src_root) + else: + raise ValueError("Source root must be an absolute path") + self.syspath = syspath.union(self.config.get("syspath", [])) + logger.info("Protected system paths: {0}".format(self.syspath)) + dest_root = os.path.expanduser(self.config["dest_root"]) + logger.info("Check backup destination against protected paths ...") + self.dest_root = self.check_dest(dest_root) + logger.info("Backup destination: %s" % self.dest_root) + self.dryrun = dryrun + logger.info("Dry run mode: %s" % dryrun) + self.debug = debug + logger.info("Show DEBUG information: %s" % debug) + + def check_dest(self, dest): + """ + Check the given destination backup directory against the protected + system paths. + """ + dest = dest.rstrip("/") + if not os.path.isabs(dest): + raise ValueError("Destination must be an absolute path") + for sp in self.syspath: + if fnmatch(dest, sp): + raise ValueError("Destination cannot under " + "protected paths: %s" % dest) + return dest + + def backup(self): + """ + Do backup sources to the destination. + + The necessary destination directories are created before copying + files. + """ + if not os.path.exists(self.dest_root): + logger.info("Create destination root: " % self.dest_root) + self.mkdir(self.dest_root, dryrun=self.dryrun) + for p in self.config["sources"]: + src = os.path.join(self.src_root, p.lstrip("/")) + dest = os.path.join(self.dest_root, p.lstrip("/")) + if os.path.isdir(src): + src_dir = src + dest_dir = dest + else: + src_dir = os.path.dirname(src) + dest_dir = os.path.dirname(dest) + self.mkdirs(dest_dir, ref=src_dir, dryrun=self.dryrun) + self.copy(src, dest, dryrun=self.dryrun, debug=self.debug) + + def cleanup(self): + """ + Remove the obsolete files/directories from the destination. + """ + for p in self.config.get("dest_remove", []): + path = os.path.join(self.dest_root, p.lstrip("/")) + self.remove(path, dest_root=self.dest_root, + dryrun=self.dryrun, debug=self.debug) + + @staticmethod + def copy(src, dest, dryrun=False, debug=False): + """ + Copy file/directory using `rsync` with metadata preserved, and to + keep directory contents in sync. + + Use `rsync --version` to check the available capabilities. + """ + if os.path.isdir(src): + src = src.rstrip("/") + "/" + dest = dest.rstrip("/") + "/" + logger.info("Copy & sync: %s -> %s" % (src, dest)) + args = ["--archive", "--hard-links", "--numeric-ids", + "--delete", "--delete-after"] + if os.uname().sysname == "Linux": + args += ["--acls", "--xattrs"] + if debug: + args += ["--verbose"] + cmd = ["rsync"] + args + [src, dest] + if not dryrun: + subprocess.check_call(cmd) + + @staticmethod + def remove(path, dest_root, dryrun=False, debug=False): + """ + Remove the specified file/directory using `rm -rf`, to clean + up the destination backup. + + The specified path must locate under the `dest_root` for safety. + """ + if not fnmatch(path, dest_root+"/*"): + raise ValueError("Not allowed to remove file/directory " + "outside destination: %s" % path) + if not os.path.exists(path): + return + logger.info("Remove: %s" % path) + args = ["-r", "-f"] + if debug: + args += ["-v"] + cmd = ["rm"] + args + [path] + if not dryrun: + subprocess.check_call(cmd) + + @classmethod + def mkdirs(cls, dest, ref=None, dryrun=False): + """ + Recursively create the destination directories. + """ + head, tail = os.path.split(dest) + head_ref, tail_ref = os.path.split(ref) + if not tail: + head, tail = os.path.split(head) + head_ref, tail_ref = os.path.split(head_ref) + if head and tail and not os.path.exists(head): + cls.mkdirs(head, ref=head_ref, dryrun=dryrun) + if tail == os.path.curdir: + # `xxx/newdir/.` exists if `xxx/newdir` exists + return + cls.mkdir(dest, ref=ref, dryrun=dryrun) + + @classmethod + def mkdir(cls, dest, ref=None, dryrun=False): + """ + Make the destination directory with the same metadata w.r.t. the + reference directory. + + Parameters + ---------- + dest : str + The destination directory to be created + ref : str, optional + The reference directory, whose metadata will be mirrored to + the destination directory. + dryrun : bool + If True, do not actually create the directory + """ + if os.path.exists(dest): + return + logger.info("Create directory: %s" % dest) + if not dryrun: + os.mkdir(dest) + if ref and not dryrun: + cls.copystat(ref, dest) + + @classmethod + def copystat(cls, src, dest, copy_own=True, copy_xattr=True): + """ + Copy all stat info (mode bits, atime, mtime, flags) from `src` to + `dest`. If `copy_own=True`, the uid and gid are also copied. + If `copy_xattr=True`, the extended attributes are also copied + (only available on Linux). + """ + st = os.stat(src) + mode = stat.S_IMODE(st.st_mode) + os.chmod(dest, mode=mode) + os.utime(dest, ns=(st.st_atime_ns, st.st_mtime_ns)) + if hasattr(st, "st_flags"): + os.chflags(dest, flags=st.st_flags) + if copy_own: + os.chown(dest, uid=st.st_uid, gid=st.st_gid) + if copy_xattr: + cls.copyxattr(src, dest) + + @staticmethod + def copyxattr(src, dest): + """ + Copy the extended attributes (xattr) from `src` to `dest`. + + NOTE: xattr only available on Linux. + """ + if not hasattr(os, "listxattr"): + return + for name in os.listxattr(src): + value = os.getxattr(src, name) + os.setxattr(dest, name, value) + + +def main(): + parser = argparse.ArgumentParser( + description="Backup files preserving metadata") + parser.add_argument("-n", "--dry-run", dest="dryrun", action="store_true", + help="dry run, do not perform actual action") + parser.add_argument("-q", "--quiet", dest="quiet", action="store_true", + help="be quiet") + parser.add_argument("-d", "--debug", dest="debug", action="store_true", + help="show verbose debug information") + parser.add_argument("config", help="configuration file") + args = parser.parse_args() + + if args.quiet and not args.dryrun: + logging.basicConfig(level=logging.WARNING) + if args.debug: + logging.basicConfig(level=logging.DEBUG) + + now = datetime.now() + logger.info("=== %s @ %s ===" % (" ".join(sys.argv), now.isoformat())) + if args.dryrun: + logger.info("*** DRY RUN ***") + backup = Backup(args.config, dryrun=args.dryrun, debug=args.debug) + backup.backup() + backup.cleanup() + logger.info("=== Backup Finished! @ %s ===" % datetime.now().isoformat()) + + +if __name__ == "__main__": + main() diff --git a/backup/dar-backup.py b/backup/dar-backup.py new file mode 100755 index 0000000..47ff815 --- /dev/null +++ b/backup/dar-backup.py @@ -0,0 +1,315 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2017 Aaron LI +# MIT license +# +# 2017-03-08 + +""" +Full/differential backup using 'dar' with 'par2'. + +This script accepts a configuration file, which is an simple YAML file +and defines all necessary options for 'dar'. +The 'dar'-created archives are stored under the same directory as the +configuration file, and 'par2' is used to create redundancy data for +possible repairs at the same time, and finally the archives are +tested/verified by 'dar'. + + +Example configuration file for system/root backup +------------------------------------------------- +# recommended format: -root--{date} +name: "office-root-opensuse-{date}" +fs_root: "/" +is_system: true +prune: [] +exclude: [] +exclude_compression: [] + + +Example configuration file for user home backup +----------------------------------------------- +# recommended format: -home--{date} +name: "office-home-aly-{date}" +fs_root: "/home/aly" +prune: [".cache",] +exclude: [] +exclude_compression: [] + + +Credit/references +----------------- +[1] http://www.halfgaar.net/backing-up-unix +[2] http://dar.linux.free.fr/doc/man/dar.html +[3] https://github.com/Parchive/par2cmdline +[4] http://dar.linux.free.fr/doc/samples/dar_par_create.duc +[5] http://dar.linux.free.fr/doc/samples/dar_par_test.duc +""" + +import os +import sys +import argparse +import subprocess +import logging +from datetime import datetime +from glob import glob + +import yaml + + +progname = os.path.basename(sys.argv[0]) +logger = logging.getLogger(progname) + + +class DarSettings: + """ + dar settings + """ + # directory (relative path) for storing the isolated catalogs + catalog_path = "catalogs" + + # date format for output archive name + date_fmt = "%Y%m%dT%H%M" + + # Default settings + args_common = [ + "--min-digits", "3,3,3", + "--noconf", # do not try to read /etc/darrc or ~/.darrc + ] + args_default = [ + "--alter=atime", # do not preserve atime + "--alter=no-case", # case insensitive mode + "--alter=glob", # glob expression mode (instead of regex mode) + "--alter=binary", # use 2^10 instead of 10^3 + "--compression=bzip2", + "--empty-dir", # create empty directories for the excluded + "--hash", "sha512", # calculate the hash of slices on the fly + "--no-overwrite", + "--no-mount-points", # stay in the same filesystem + ] + size_slice = "2G" + exclude = set(["*~", ".*~", ".~*"]) + exclude_compression = set([ + "*.7z", "*.ape", "*.avi", "*.bz2", "*.deb", "*.exe", "*.flac", + "*.flv", "*.gz", "*.iso", "*.jar", "*.jpg", "*.jpeg", + "*.m4a", "*.m4v", "*.mkv", "*.mov", "*.mp3", "*.mp4", "*.ogg", + "*.rar", "*.rpm", "*.tar.bz2", "*.tar.gz", "*.tar.xz", + "*.tbz", "*.tgz", "*.txz", "*.wmv", "*.xz", "*.zip", + ]) + prune = set() + verbose = set() + + # Parchive + redundancy = 5 # 5% of redundancy for par2 + cmd_par_create = ("par2 create -r{redundancy} -n1 " + "'%p/%b.%N.par2' '%p/%b.%N.%e'") + cmd_par_test = "par2 verify '%p/%b.%N.par2'" + + # Whether it is a backup of system root + is_system = False + prune_system = set(["dev/pts", "dev/shm", "home", "lost+found", + "media", "mnt", "proc", "run", "sys", + "tmp", "var/cache", "var/tmp"]) + + def __init__(self, configfile, verbose=False, dry_run=False): + if verbose: + self.verbose = set(["treated", "messages"]) + else: + self.verbose = set() + if dry_run: + self.args_common += ["--dry-run"] + + self.path = os.path.dirname(os.path.abspath(configfile)) + settings = yaml.load(open(configfile)) + self.merge_settings(settings) + + def merge_settings(self, settings): + self.name = settings["name"] + self.fs_root = settings["fs_root"] + self.is_system = settings.get("is_system", self.is_system) + self.date_fmt = settings.get("date_fmt", self.date_fmt) + self.catalog_path = settings.get("catalog_path", self.catalog_path) + self.size_slice = settings.get("size_slice", self.size_slice) + self.redundancy = settings.get("redundancy", self.redundancy) + self.verbose = self.verbose.union(settings.get("verbose", set())) + self.prune = self.prune.union(settings.get("prune", set())) + self.exclude = self.exclude.union(settings.get("exclude", set())) + self.exclude_compression = self.exclude_compression.union( + settings.get("exclude_compression", set())) + + def archive_name(self, date=None): + # Return the archive name with date substituted + if date is None: + date = datetime.now().strftime(self.date_fmt) + name = os.path.join(self.path, self.name.format(date=date)) + return name + + def last_archive_name(self): + # Get the last archive (backup data or isolated catalog) + # used as the reference archive for a differential backup. + name = self.last_backup() + if name is None: + name = self.last_catalog() + return name + + def last_backup(self): + # Return the name of last backup if found, otherwise, return None + backup_glob = self.archive_name(date="*") + ".dar" + try: + backup = sorted(glob(backup_glob))[-1] + name = os.path.splitext(os.path.splitext(backup)[0])[0] + logger.info("Found last backup: {0}".format(name)) + return name + except IndexError: + logger.info("No existing backups found!") + return None + + def last_catalog(self): + # Return the name of last isolated catalog if found, otherwise, + # return None + basename = os.path.basename(self.archive_name(date="*")) + ".dar" + catalog_glob = os.path.join(self.path, self.catalog_path, basename) + try: + catalog = sorted(glob(catalog_glob))[-1] + name = os.path.splitext(os.path.splitext(catalog)[0])[0] + logger.info("Found last catalog: {0}".format(name)) + return name + except IndexError: + logger.info("No existing catalogs found!") + return None + + @property + def args_create(self): + cmd_par = self.cmd_par_create.format(redundancy=self.redundancy) + args = self.args_default + [ + "--execute", cmd_par, + "--fs-root", self.fs_root, + "--slice", self.size_slice, + ] + args += ["--verbose=%s" % item for item in self.verbose] + for item in self.exclude: + args += ["--exclude", item] + for item in self.exclude_compression: + args += ["-Z", item] + for item in self.prune: + args += ["--prune", item] + if self.is_system: + for item in self.prune_system: + args += ["--prune", item] + logger.info("args_create: {0}".format(args)) + return args + + @property + def args_test(self): + args = ["--execute", self.cmd_par_test] + args += ["--verbose=%s" % item for item in self.verbose] + logger.info("args_test: {0}".format(args)) + return args + + +class DarBackup: + """ + dar backup with settings + """ + def __init__(self, settings): + self.settings = settings + self.is_system = settings.is_system + self.path = settings.path + self.catalog_path = settings.catalog_path + self.archive_name = settings.archive_name() + self.last_archive_name = settings.last_archive_name() + self.args_common = settings.args_common + self.args_create = settings.args_create + self.args_test = settings.args_test + + def run(self, dry_run=False): + if self.is_system and (not self.is_running_as_root()): + raise RuntimeError("Running as root to backup the system root") + + logger.info("Output archive name: {0}".format(self.archive_name)) + if self.last_archive_name is not None: + self.backup_diff() + else: + self.backup_full() + + if not dry_run: + self.test_backup() + self.isolate_catalog() + + def backup_full(self): + logger.info("Start full backup ...") + cmd = [ + "dar", + "--create", self.archive_name, + ] + self.args_common + self.args_create + logger.info("Command: {0}".format(cmd)) + subprocess.run(cmd, check=True) + logger.info("Full backup: DONE!") + + def backup_diff(self): + logger.info("Start differential backup ...") + cmd = [ + "dar", + "--create", self.archive_name, + "--ref", self.last_archive_name, + ] + self.args_common + self.args_create + logger.info("Command: {0}".format(cmd)) + subprocess.run(cmd, check=True) + logger.info("Differential backup: DONE!") + + def test_backup(self): + logger.info("Test created backup ...") + cmd = [ + "dar", + "--test", self.archive_name, + ] + self.args_common + self.args_test + logger.info("Command: {0}".format(cmd)) + subprocess.run(cmd, check=True) + logger.info("Test backup: DONE!") + + def isolate_catalog(self): + logger.info("Isolate the catalog from backup ...") + catalog_dir = os.path.join(self.path, self.catalog_path) + if not os.path.exists(catalog_dir): + os.mkdir(catalog_dir) + catalog = os.path.join(catalog_dir, + os.path.basename(self.archive_name)) + logger.info("Output catalog: {0}".format(catalog)) + cmd = [ + "dar", + "--isolate", catalog, + "--ref", self.archive_name, + ] + self.args_common + logger.info("Command: {0}".format(cmd)) + subprocess.run(cmd, check=True) + logger.info("Isolate backup catalog: DONE!") + + @staticmethod + def is_running_as_root(): + return os.getuid() == 0 + + +def main(): + parser = argparse.ArgumentParser( + description="Backup system/data using dar and par2") + parser.add_argument("-c", "--config", dest="config", required=True, + help="configuration file for dar and archive. " + + "NOTE: the backup archive will be placed under " + + "the same directory as this configuration file") + parser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", + help="dry run, do not perform any action") + parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", + help="show verbose information") + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(level=logging.INFO) + + settings = DarSettings(args.config, verbose=args.verbose, + dry_run=args.dry_run) + dar = DarBackup(settings) + dar.run(dry_run=args.dry_run) + + +if __name__ == "__main__": + main() diff --git a/bin/backup.py b/bin/backup.py deleted file mode 100755 index f204f13..0000000 --- a/bin/backup.py +++ /dev/null @@ -1,273 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2017 Aaron LI -# MIT license -# -# 2017-05-11 - -""" -Backup the specified files & directories with the metadata (e.g., ownership, -permission) preserved as much as possible. - -The extended attributes (i.e., xattr) are only available on Linux only. - - -Example configuration (YAML format) ------------------------------------ -src_root : / -sources : [] -dest_root : /backup -dest_remove : [] -syspath : [] ------------------------------------ - -References ----------- -""" - -import os -import sys -import stat -import argparse -import subprocess -import logging -from fnmatch import fnmatch -from datetime import datetime - -try: - from yaml import load as yaml_load -except ImportError: - from ruamel.yaml import safe_load as yaml_load - - -logging.basicConfig(level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", - datefmt="%Y-%m-%dT%H:%M:%S") -progname = os.path.basename(sys.argv[0]) -logger = logging.getLogger(progname) - -# Protected system paths -# The destination backup path is checked against these paths, and cannot -# located where matches these paths, thus to avoid the system files being -# overwritten or removed in accidence. -syspath = set(["/", "/home", "/media", "/mnt", "/root", - "/bin/*", "/boot/*", "/dev/*", "/etc/*", "/lib/*", "/lib64/*", - "/proc/*", "/sbin/*", "/sys/*", "/usr/*", "/var/*"]) - - -class Backup: - """ - Backup files with metadata preserved. - """ - def __init__(self, configfile, dryrun=False, debug=False): - self.configfile = configfile - self.config = yaml_load(open(configfile)) - logger.info("Loaded configuration file: %s" % configfile) - src_root = self.config.get("src_root", "/") - if os.path.isabs(src_root): - self.src_root = src_root - logger.info("Source root directory: %s" % self.src_root) - else: - raise ValueError("Source root must be an absolute path") - self.syspath = syspath.union(self.config.get("syspath", [])) - logger.info("Protected system paths: {0}".format(self.syspath)) - dest_root = os.path.expanduser(self.config["dest_root"]) - logger.info("Check backup destination against protected paths ...") - self.dest_root = self.check_dest(dest_root) - logger.info("Backup destination: %s" % self.dest_root) - self.dryrun = dryrun - logger.info("Dry run mode: %s" % dryrun) - self.debug = debug - logger.info("Show DEBUG information: %s" % debug) - - def check_dest(self, dest): - """ - Check the given destination backup directory against the protected - system paths. - """ - dest = dest.rstrip("/") - if not os.path.isabs(dest): - raise ValueError("Destination must be an absolute path") - for sp in self.syspath: - if fnmatch(dest, sp): - raise ValueError("Destination cannot under " - "protected paths: %s" % dest) - return dest - - def backup(self): - """ - Do backup sources to the destination. - - The necessary destination directories are created before copying - files. - """ - if not os.path.exists(self.dest_root): - logger.info("Create destination root: " % self.dest_root) - self.mkdir(self.dest_root, dryrun=self.dryrun) - for p in self.config["sources"]: - src = os.path.join(self.src_root, p.lstrip("/")) - dest = os.path.join(self.dest_root, p.lstrip("/")) - if os.path.isdir(src): - src_dir = src - dest_dir = dest - else: - src_dir = os.path.dirname(src) - dest_dir = os.path.dirname(dest) - self.mkdirs(dest_dir, ref=src_dir, dryrun=self.dryrun) - self.copy(src, dest, dryrun=self.dryrun, debug=self.debug) - - def cleanup(self): - """ - Remove the obsolete files/directories from the destination. - """ - for p in self.config.get("dest_remove", []): - path = os.path.join(self.dest_root, p.lstrip("/")) - self.remove(path, dest_root=self.dest_root, - dryrun=self.dryrun, debug=self.debug) - - @staticmethod - def copy(src, dest, dryrun=False, debug=False): - """ - Copy file/directory using `rsync` with metadata preserved, and to - keep directory contents in sync. - - Use `rsync --version` to check the available capabilities. - """ - if os.path.isdir(src): - src = src.rstrip("/") + "/" - dest = dest.rstrip("/") + "/" - logger.info("Copy & sync: %s -> %s" % (src, dest)) - args = ["--archive", "--hard-links", "--numeric-ids", - "--delete", "--delete-after"] - if os.uname().sysname == "Linux": - args += ["--acls", "--xattrs"] - if debug: - args += ["--verbose"] - cmd = ["rsync"] + args + [src, dest] - if not dryrun: - subprocess.check_call(cmd) - - @staticmethod - def remove(path, dest_root, dryrun=False, debug=False): - """ - Remove the specified file/directory using `rm -rf`, to clean - up the destination backup. - - The specified path must locate under the `dest_root` for safety. - """ - if not fnmatch(path, dest_root+"/*"): - raise ValueError("Not allowed to remove file/directory " - "outside destination: %s" % path) - if not os.path.exists(path): - return - logger.info("Remove: %s" % path) - args = ["-r", "-f"] - if debug: - args += ["-v"] - cmd = ["rm"] + args + [path] - if not dryrun: - subprocess.check_call(cmd) - - @classmethod - def mkdirs(cls, dest, ref=None, dryrun=False): - """ - Recursively create the destination directories. - """ - head, tail = os.path.split(dest) - head_ref, tail_ref = os.path.split(ref) - if not tail: - head, tail = os.path.split(head) - head_ref, tail_ref = os.path.split(head_ref) - if head and tail and not os.path.exists(head): - cls.mkdirs(head, ref=head_ref, dryrun=dryrun) - if tail == os.path.curdir: - # `xxx/newdir/.` exists if `xxx/newdir` exists - return - cls.mkdir(dest, ref=ref, dryrun=dryrun) - - @classmethod - def mkdir(cls, dest, ref=None, dryrun=False): - """ - Make the destination directory with the same metadata w.r.t. the - reference directory. - - Parameters - ---------- - dest : str - The destination directory to be created - ref : str, optional - The reference directory, whose metadata will be mirrored to - the destination directory. - dryrun : bool - If True, do not actually create the directory - """ - if os.path.exists(dest): - return - logger.info("Create directory: %s" % dest) - if not dryrun: - os.mkdir(dest) - if ref and not dryrun: - cls.copystat(ref, dest) - - @classmethod - def copystat(cls, src, dest, copy_own=True, copy_xattr=True): - """ - Copy all stat info (mode bits, atime, mtime, flags) from `src` to - `dest`. If `copy_own=True`, the uid and gid are also copied. - If `copy_xattr=True`, the extended attributes are also copied - (only available on Linux). - """ - st = os.stat(src) - mode = stat.S_IMODE(st.st_mode) - os.chmod(dest, mode=mode) - os.utime(dest, ns=(st.st_atime_ns, st.st_mtime_ns)) - if hasattr(st, "st_flags"): - os.chflags(dest, flags=st.st_flags) - if copy_own: - os.chown(dest, uid=st.st_uid, gid=st.st_gid) - if copy_xattr: - cls.copyxattr(src, dest) - - @staticmethod - def copyxattr(src, dest): - """ - Copy the extended attributes (xattr) from `src` to `dest`. - - NOTE: xattr only available on Linux. - """ - if not hasattr(os, "listxattr"): - return - for name in os.listxattr(src): - value = os.getxattr(src, name) - os.setxattr(dest, name, value) - - -def main(): - parser = argparse.ArgumentParser( - description="Backup files preserving metadata") - parser.add_argument("-n", "--dry-run", dest="dryrun", action="store_true", - help="dry run, do not perform actual action") - parser.add_argument("-q", "--quiet", dest="quiet", action="store_true", - help="be quiet") - parser.add_argument("-d", "--debug", dest="debug", action="store_true", - help="show verbose debug information") - parser.add_argument("config", help="configuration file") - args = parser.parse_args() - - if args.quiet and not args.dryrun: - logging.basicConfig(level=logging.WARNING) - if args.debug: - logging.basicConfig(level=logging.DEBUG) - - now = datetime.now() - logger.info("=== %s @ %s ===" % (" ".join(sys.argv), now.isoformat())) - if args.dryrun: - logger.info("*** DRY RUN ***") - backup = Backup(args.config, dryrun=args.dryrun, debug=args.debug) - backup.backup() - backup.cleanup() - logger.info("=== Backup Finished! @ %s ===" % datetime.now().isoformat()) - - -if __name__ == "__main__": - main() diff --git a/bin/dar_backup.py b/bin/dar_backup.py deleted file mode 100755 index 47ff815..0000000 --- a/bin/dar_backup.py +++ /dev/null @@ -1,315 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright (c) 2017 Aaron LI -# MIT license -# -# 2017-03-08 - -""" -Full/differential backup using 'dar' with 'par2'. - -This script accepts a configuration file, which is an simple YAML file -and defines all necessary options for 'dar'. -The 'dar'-created archives are stored under the same directory as the -configuration file, and 'par2' is used to create redundancy data for -possible repairs at the same time, and finally the archives are -tested/verified by 'dar'. - - -Example configuration file for system/root backup -------------------------------------------------- -# recommended format: -root--{date} -name: "office-root-opensuse-{date}" -fs_root: "/" -is_system: true -prune: [] -exclude: [] -exclude_compression: [] - - -Example configuration file for user home backup ------------------------------------------------ -# recommended format: -home--{date} -name: "office-home-aly-{date}" -fs_root: "/home/aly" -prune: [".cache",] -exclude: [] -exclude_compression: [] - - -Credit/references ------------------ -[1] http://www.halfgaar.net/backing-up-unix -[2] http://dar.linux.free.fr/doc/man/dar.html -[3] https://github.com/Parchive/par2cmdline -[4] http://dar.linux.free.fr/doc/samples/dar_par_create.duc -[5] http://dar.linux.free.fr/doc/samples/dar_par_test.duc -""" - -import os -import sys -import argparse -import subprocess -import logging -from datetime import datetime -from glob import glob - -import yaml - - -progname = os.path.basename(sys.argv[0]) -logger = logging.getLogger(progname) - - -class DarSettings: - """ - dar settings - """ - # directory (relative path) for storing the isolated catalogs - catalog_path = "catalogs" - - # date format for output archive name - date_fmt = "%Y%m%dT%H%M" - - # Default settings - args_common = [ - "--min-digits", "3,3,3", - "--noconf", # do not try to read /etc/darrc or ~/.darrc - ] - args_default = [ - "--alter=atime", # do not preserve atime - "--alter=no-case", # case insensitive mode - "--alter=glob", # glob expression mode (instead of regex mode) - "--alter=binary", # use 2^10 instead of 10^3 - "--compression=bzip2", - "--empty-dir", # create empty directories for the excluded - "--hash", "sha512", # calculate the hash of slices on the fly - "--no-overwrite", - "--no-mount-points", # stay in the same filesystem - ] - size_slice = "2G" - exclude = set(["*~", ".*~", ".~*"]) - exclude_compression = set([ - "*.7z", "*.ape", "*.avi", "*.bz2", "*.deb", "*.exe", "*.flac", - "*.flv", "*.gz", "*.iso", "*.jar", "*.jpg", "*.jpeg", - "*.m4a", "*.m4v", "*.mkv", "*.mov", "*.mp3", "*.mp4", "*.ogg", - "*.rar", "*.rpm", "*.tar.bz2", "*.tar.gz", "*.tar.xz", - "*.tbz", "*.tgz", "*.txz", "*.wmv", "*.xz", "*.zip", - ]) - prune = set() - verbose = set() - - # Parchive - redundancy = 5 # 5% of redundancy for par2 - cmd_par_create = ("par2 create -r{redundancy} -n1 " - "'%p/%b.%N.par2' '%p/%b.%N.%e'") - cmd_par_test = "par2 verify '%p/%b.%N.par2'" - - # Whether it is a backup of system root - is_system = False - prune_system = set(["dev/pts", "dev/shm", "home", "lost+found", - "media", "mnt", "proc", "run", "sys", - "tmp", "var/cache", "var/tmp"]) - - def __init__(self, configfile, verbose=False, dry_run=False): - if verbose: - self.verbose = set(["treated", "messages"]) - else: - self.verbose = set() - if dry_run: - self.args_common += ["--dry-run"] - - self.path = os.path.dirname(os.path.abspath(configfile)) - settings = yaml.load(open(configfile)) - self.merge_settings(settings) - - def merge_settings(self, settings): - self.name = settings["name"] - self.fs_root = settings["fs_root"] - self.is_system = settings.get("is_system", self.is_system) - self.date_fmt = settings.get("date_fmt", self.date_fmt) - self.catalog_path = settings.get("catalog_path", self.catalog_path) - self.size_slice = settings.get("size_slice", self.size_slice) - self.redundancy = settings.get("redundancy", self.redundancy) - self.verbose = self.verbose.union(settings.get("verbose", set())) - self.prune = self.prune.union(settings.get("prune", set())) - self.exclude = self.exclude.union(settings.get("exclude", set())) - self.exclude_compression = self.exclude_compression.union( - settings.get("exclude_compression", set())) - - def archive_name(self, date=None): - # Return the archive name with date substituted - if date is None: - date = datetime.now().strftime(self.date_fmt) - name = os.path.join(self.path, self.name.format(date=date)) - return name - - def last_archive_name(self): - # Get the last archive (backup data or isolated catalog) - # used as the reference archive for a differential backup. - name = self.last_backup() - if name is None: - name = self.last_catalog() - return name - - def last_backup(self): - # Return the name of last backup if found, otherwise, return None - backup_glob = self.archive_name(date="*") + ".dar" - try: - backup = sorted(glob(backup_glob))[-1] - name = os.path.splitext(os.path.splitext(backup)[0])[0] - logger.info("Found last backup: {0}".format(name)) - return name - except IndexError: - logger.info("No existing backups found!") - return None - - def last_catalog(self): - # Return the name of last isolated catalog if found, otherwise, - # return None - basename = os.path.basename(self.archive_name(date="*")) + ".dar" - catalog_glob = os.path.join(self.path, self.catalog_path, basename) - try: - catalog = sorted(glob(catalog_glob))[-1] - name = os.path.splitext(os.path.splitext(catalog)[0])[0] - logger.info("Found last catalog: {0}".format(name)) - return name - except IndexError: - logger.info("No existing catalogs found!") - return None - - @property - def args_create(self): - cmd_par = self.cmd_par_create.format(redundancy=self.redundancy) - args = self.args_default + [ - "--execute", cmd_par, - "--fs-root", self.fs_root, - "--slice", self.size_slice, - ] - args += ["--verbose=%s" % item for item in self.verbose] - for item in self.exclude: - args += ["--exclude", item] - for item in self.exclude_compression: - args += ["-Z", item] - for item in self.prune: - args += ["--prune", item] - if self.is_system: - for item in self.prune_system: - args += ["--prune", item] - logger.info("args_create: {0}".format(args)) - return args - - @property - def args_test(self): - args = ["--execute", self.cmd_par_test] - args += ["--verbose=%s" % item for item in self.verbose] - logger.info("args_test: {0}".format(args)) - return args - - -class DarBackup: - """ - dar backup with settings - """ - def __init__(self, settings): - self.settings = settings - self.is_system = settings.is_system - self.path = settings.path - self.catalog_path = settings.catalog_path - self.archive_name = settings.archive_name() - self.last_archive_name = settings.last_archive_name() - self.args_common = settings.args_common - self.args_create = settings.args_create - self.args_test = settings.args_test - - def run(self, dry_run=False): - if self.is_system and (not self.is_running_as_root()): - raise RuntimeError("Running as root to backup the system root") - - logger.info("Output archive name: {0}".format(self.archive_name)) - if self.last_archive_name is not None: - self.backup_diff() - else: - self.backup_full() - - if not dry_run: - self.test_backup() - self.isolate_catalog() - - def backup_full(self): - logger.info("Start full backup ...") - cmd = [ - "dar", - "--create", self.archive_name, - ] + self.args_common + self.args_create - logger.info("Command: {0}".format(cmd)) - subprocess.run(cmd, check=True) - logger.info("Full backup: DONE!") - - def backup_diff(self): - logger.info("Start differential backup ...") - cmd = [ - "dar", - "--create", self.archive_name, - "--ref", self.last_archive_name, - ] + self.args_common + self.args_create - logger.info("Command: {0}".format(cmd)) - subprocess.run(cmd, check=True) - logger.info("Differential backup: DONE!") - - def test_backup(self): - logger.info("Test created backup ...") - cmd = [ - "dar", - "--test", self.archive_name, - ] + self.args_common + self.args_test - logger.info("Command: {0}".format(cmd)) - subprocess.run(cmd, check=True) - logger.info("Test backup: DONE!") - - def isolate_catalog(self): - logger.info("Isolate the catalog from backup ...") - catalog_dir = os.path.join(self.path, self.catalog_path) - if not os.path.exists(catalog_dir): - os.mkdir(catalog_dir) - catalog = os.path.join(catalog_dir, - os.path.basename(self.archive_name)) - logger.info("Output catalog: {0}".format(catalog)) - cmd = [ - "dar", - "--isolate", catalog, - "--ref", self.archive_name, - ] + self.args_common - logger.info("Command: {0}".format(cmd)) - subprocess.run(cmd, check=True) - logger.info("Isolate backup catalog: DONE!") - - @staticmethod - def is_running_as_root(): - return os.getuid() == 0 - - -def main(): - parser = argparse.ArgumentParser( - description="Backup system/data using dar and par2") - parser.add_argument("-c", "--config", dest="config", required=True, - help="configuration file for dar and archive. " + - "NOTE: the backup archive will be placed under " + - "the same directory as this configuration file") - parser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", - help="dry run, do not perform any action") - parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", - help="show verbose information") - args = parser.parse_args() - - if args.verbose: - logging.basicConfig(level=logging.INFO) - - settings = DarSettings(args.config, verbose=args.verbose, - dry_run=args.dry_run) - dar = DarBackup(settings) - dar.run(dry_run=args.dry_run) - - -if __name__ == "__main__": - main() -- cgit v1.2.2