From 8e07e5eef87d5d40bf371e83692853fb98f3c070 Mon Sep 17 00:00:00 2001 From: Aaron LI Date: Sat, 13 May 2017 15:34:13 +0800 Subject: Add bin/backup.py Backup the specified files & directories with the metadata (e.g., ownership, permission) preserved as much as possible. --- bin/backup.py | 269 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100755 bin/backup.py (limited to 'bin') diff --git a/bin/backup.py b/bin/backup.py new file mode 100755 index 0000000..48650ae --- /dev/null +++ b/bin/backup.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2017 Aaron LI +# MIT license +# +# 2017-05-11 + +""" +Backup the specified files & directories with the metadata (e.g., ownership, +permission) preserved as much as possible. + +The extended attributes (i.e., xattr) are only available on Linux only. + + +Example configuration (YAML format) +----------------------------------- +src_root : / +sources : [] +dest_root : /backup +dest_remove : [] +syspath : [] +----------------------------------- + +References +---------- +""" + +import os +import sys +import stat +import argparse +import subprocess +import logging +from fnmatch import fnmatch +from datetime import datetime + +try: + from yaml import load as yaml_load +except ImportError: + from ruamel.yaml import safe_load as yaml_load + + +logging.basicConfig(level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S") +progname = os.path.basename(sys.argv[0]) +logger = logging.getLogger(progname) + +# Protected system paths +# The destination backup path is checked against these paths, and cannot +# located where matches these paths, thus to avoid the system files being +# overwritten or removed in accidence. +syspath = set(["/", "/home", "/media", "/mnt", "/root", + "/bin/*", "/boot/*", "/dev/*", "/etc/*", "/lib/*", "/lib64/*", + "/proc/*", "/sbin/*", "/sys/*", "/usr/*", "/var/*"]) + + +class Backup: + """ + Backup files with metadata preserved. + """ + def __init__(self, configfile, dryrun=False, debug=False): + self.configfile = configfile + self.config = yaml_load(open(configfile)) + logger.info("Loaded configuration file: %s" % configfile) + src_root = self.config.get("src_root", "/") + if os.path.isabs(src_root): + self.src_root = src_root + logger.info("Source root directory: %s" % self.src_root) + else: + raise ValueError("Source root must be an absolute path") + self.syspath = syspath.union(self.config.get("syspath", [])) + logger.info("Protected system paths: {0}".format(self.syspath)) + dest_root = os.path.expanduser(self.config["dest_root"]) + logger.info("Check backup destination against protected paths ...") + self.dest_root = self.check_dest(dest_root) + logger.info("Backup destination: %s" % self.dest_root) + self.dryrun = dryrun + logger.info("Dry run mode: %s" % dryrun) + self.debug = debug + logger.info("Show DEBUG information: %s" % debug) + + def check_dest(self, dest): + """ + Check the given destination backup directory against the protected + system paths. + """ + dest = dest.rstrip("/") + if not os.path.isabs(dest): + raise ValueError("Destination must be an absolute path") + for sp in self.syspath: + if fnmatch(dest, sp): + raise ValueError("Destination cannot under " + "protected paths: %s" % dest) + return dest + + def backup(self): + """ + Do backup sources to the destination. + + The necessary destination directories are created before copying + files. + """ + if not os.path.exists(self.dest_root): + logger.info("Create destination root: " % self.dest_root) + self.mkdir(self.dest_root, dryrun=self.dryrun) + for p in self.config["sources"]: + src = os.path.join(self.src_root, p.lstrip("/")) + dest = os.path.join(self.dest_root, p.lstrip("/")) + if os.path.isdir(src): + src_dir = src + dest_dir = dest + else: + src_dir = os.path.dirname(src) + dest_dir = os.path.dirname(dest) + self.mkdirs(dest_dir, ref=src_dir, dryrun=self.dryrun) + self.copy(src, dest, dryrun=self.dryrun, debug=self.debug) + + def cleanup(self): + """ + Remove the obsolete files/directories from the destination. + """ + for p in self.config.get("dest_remove", []): + path = os.path.join(self.dest_root, p.lstrip("/")) + self.remove(path, dest_root=self.dest_root, + dryrun=self.dryrun, debug=self.debug) + + @staticmethod + def copy(src, dest, dryrun=False, debug=False): + """ + Copy file/directory using `rsync` with metadata preserved, and to + keep directory contents in sync. + """ + if os.path.isdir(src): + src = src.rstrip("/") + "/" + dest = dest.rstrip("/") + "/" + logger.info("Copy: %s -> %s" % (src, dest)) + args = ["--archive", "--acls", "--xattrs", "--hard-links", + "--delete", "--delete-after"] + if debug: + args += ["--verbose"] + cmd = ["rsync"] + args + [src, dest] + if not dryrun: + subprocess.check_call(cmd) + + @staticmethod + def remove(path, dest_root, dryrun=False, debug=False): + """ + Remove the specified file/directory using `rm -rf`, to clean + up the destination backup. + + The specified path must locate under the `dest_root` for safety. + """ + if not fnmatch(path, dest_root+"/*"): + raise ValueError("Not allowed to remove file/directory " + "outside destination: %s" % path) + if not os.path.exists(path): + return + logger.info("Remove: %s" % path) + args = ["-r", "-f"] + if debug: + args += ["-v"] + cmd = ["rm"] + args + [path] + if not dryrun: + subprocess.check_call(cmd) + + @classmethod + def mkdirs(cls, dest, ref=None, dryrun=False): + """ + Recursively create the destination directories. + """ + head, tail = os.path.split(dest) + head_ref, tail_ref = os.path.split(ref) + if not tail: + head, tail = os.path.split(head) + head_ref, tail_ref = os.path.split(head_ref) + if head and tail and not os.path.exists(head): + cls.mkdirs(head, ref=head_ref, dryrun=dryrun) + if tail == os.path.curdir: + # `xxx/newdir/.` exists if `xxx/newdir` exists + return + cls.mkdir(dest, ref=ref, dryrun=dryrun) + + @classmethod + def mkdir(cls, dest, ref=None, dryrun=False): + """ + Make the destination directory with the same metadata w.r.t. the + reference directory. + + Parameters + ---------- + dest : str + The destination directory to be created + ref : str, optional + The reference directory, whose metadata will be mirrored to + the destination directory. + dryrun : bool + If True, do not actually create the directory + """ + if os.path.exists(dest): + return + logger.info("Create directory: %s" % dest) + if not dryrun: + os.mkdir(dest) + if ref and not dryrun: + cls.copystat(ref, dest) + + @classmethod + def copystat(cls, src, dest, copy_own=True, copy_xattr=True): + """ + Copy all stat info (mode bits, atime, mtime, flags) from `src` to + `dest`. If `copy_own=True`, the uid and gid are also copied. + If `copy_xattr=True`, the extended attributes are also copied + (only available on Linux). + """ + st = os.stat(src) + mode = stat.S_IMODE(st.st_mode) + os.chmod(dest, mode=mode) + os.utime(dest, ns=(st.st_atime_ns, st.st_mtime_ns)) + if hasattr(st, "st_flags"): + os.chflags(dest, flags=st.st_flags) + if copy_own: + os.chown(dest, uid=st.st_uid, gid=st.st_gid) + if copy_xattr: + cls.copyxattr(src, dest) + + @staticmethod + def copyxattr(src, dest): + """ + Copy the extended attributes (xattr) from `src` to `dest`. + + NOTE: xattr only available on Linux. + """ + if not hasattr(os, "listxattr"): + return + for name in os.listxattr(src): + value = os.getxattr(src, name) + os.setxattr(dest, name, value) + + +def main(): + parser = argparse.ArgumentParser( + description="Backup files preserving metadata") + parser.add_argument("-n", "--dry-run", dest="dryrun", action="store_true", + help="dry run, do not perform actual action") + parser.add_argument("-q", "--quiet", dest="quiet", action="store_true", + help="be quiet") + parser.add_argument("-d", "--debug", dest="debug", action="store_true", + help="show verbose debug information") + parser.add_argument("config", help="configuration file") + args = parser.parse_args() + + if args.quiet and not args.dryrun: + logging.basicConfig(level=logging.WARNING) + if args.debug: + logging.basicConfig(level=logging.DEBUG) + + now = datetime.now() + logger.info("=== %s @ %s ===" % (" ".join(sys.argv), now.isoformat())) + if args.dryrun: + logger.info("*** DRY RUN ***") + backup = Backup(args.config, dryrun=args.dryrun, debug=args.debug) + backup.backup() + backup.cleanup() + logger.info("=== Backup Finished! @ %s ===" % datetime.now().isoformat()) + + +if __name__ == "__main__": + main() -- cgit v1.2.2