aboutsummaryrefslogtreecommitdiffstats
path: root/backup/dar-backup.py
blob: 47ff815d0569c7a3766a8ce08b2000caf5e99b16 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
#!/usr/bin/env python3
#
# Copyright (c) 2017 Aaron LI
# MIT license
#
# 2017-03-08

"""
Full/differential backup using 'dar' with 'par2'.

This script accepts a configuration file, which is an simple YAML file
and defines all necessary options for 'dar'.
The 'dar'-created archives are stored under the same directory as the
configuration file, and 'par2' is used to create redundancy data for
possible repairs at the same time, and finally the archives are
tested/verified by 'dar'.


Example configuration file for system/root backup
-------------------------------------------------
# recommended format: <hostname>-root-<distro>-{date}
name: "office-root-opensuse-{date}"
fs_root: "/"
is_system: true
prune: []
exclude: []
exclude_compression: []


Example configuration file for user home backup
-----------------------------------------------
# recommended format: <hostname>-home-<user>-{date}
name: "office-home-aly-{date}"
fs_root: "/home/aly"
prune: [".cache",]
exclude: []
exclude_compression: []


Credit/references
-----------------
[1] http://www.halfgaar.net/backing-up-unix
[2] http://dar.linux.free.fr/doc/man/dar.html
[3] https://github.com/Parchive/par2cmdline
[4] http://dar.linux.free.fr/doc/samples/dar_par_create.duc
[5] http://dar.linux.free.fr/doc/samples/dar_par_test.duc
"""

import os
import sys
import argparse
import subprocess
import logging
from datetime import datetime
from glob import glob

import yaml


progname = os.path.basename(sys.argv[0])
logger = logging.getLogger(progname)


class DarSettings:
    """
    dar settings
    """
    # directory (relative path) for storing the isolated catalogs
    catalog_path = "catalogs"

    # date format for output archive name
    date_fmt = "%Y%m%dT%H%M"

    # Default settings
    args_common = [
        "--min-digits", "3,3,3",
        "--noconf",  # do not try to read /etc/darrc or ~/.darrc
    ]
    args_default = [
        "--alter=atime",  # do not preserve atime
        "--alter=no-case",  # case insensitive mode
        "--alter=glob",  # glob expression mode (instead of regex mode)
        "--alter=binary",  # use 2^10 instead of 10^3
        "--compression=bzip2",
        "--empty-dir",  # create empty directories for the excluded
        "--hash", "sha512",  # calculate the hash of slices on the fly
        "--no-overwrite",
        "--no-mount-points",  # stay in the same filesystem
    ]
    size_slice = "2G"
    exclude = set(["*~", ".*~", ".~*"])
    exclude_compression = set([
        "*.7z", "*.ape", "*.avi", "*.bz2", "*.deb", "*.exe", "*.flac",
        "*.flv", "*.gz", "*.iso", "*.jar", "*.jpg", "*.jpeg",
        "*.m4a", "*.m4v", "*.mkv", "*.mov", "*.mp3", "*.mp4", "*.ogg",
        "*.rar", "*.rpm", "*.tar.bz2", "*.tar.gz", "*.tar.xz",
        "*.tbz", "*.tgz", "*.txz", "*.wmv", "*.xz", "*.zip",
    ])
    prune = set()
    verbose = set()

    # Parchive
    redundancy = 5  # 5% of redundancy for par2
    cmd_par_create = ("par2 create -r{redundancy} -n1 "
                      "'%p/%b.%N.par2' '%p/%b.%N.%e'")
    cmd_par_test = "par2 verify '%p/%b.%N.par2'"

    # Whether it is a backup of system root
    is_system = False
    prune_system = set(["dev/pts", "dev/shm", "home", "lost+found",
                        "media", "mnt", "proc", "run", "sys",
                        "tmp", "var/cache", "var/tmp"])

    def __init__(self, configfile, verbose=False, dry_run=False):
        if verbose:
            self.verbose = set(["treated", "messages"])
        else:
            self.verbose = set()
        if dry_run:
            self.args_common += ["--dry-run"]

        self.path = os.path.dirname(os.path.abspath(configfile))
        settings = yaml.load(open(configfile))
        self.merge_settings(settings)

    def merge_settings(self, settings):
        self.name = settings["name"]
        self.fs_root = settings["fs_root"]
        self.is_system = settings.get("is_system", self.is_system)
        self.date_fmt = settings.get("date_fmt", self.date_fmt)
        self.catalog_path = settings.get("catalog_path", self.catalog_path)
        self.size_slice = settings.get("size_slice", self.size_slice)
        self.redundancy = settings.get("redundancy", self.redundancy)
        self.verbose = self.verbose.union(settings.get("verbose", set()))
        self.prune = self.prune.union(settings.get("prune", set()))
        self.exclude = self.exclude.union(settings.get("exclude", set()))
        self.exclude_compression = self.exclude_compression.union(
            settings.get("exclude_compression", set()))

    def archive_name(self, date=None):
        # Return the archive name with date substituted
        if date is None:
            date = datetime.now().strftime(self.date_fmt)
        name = os.path.join(self.path, self.name.format(date=date))
        return name

    def last_archive_name(self):
        # Get the last archive (backup data or isolated catalog)
        # used as the reference archive for a differential backup.
        name = self.last_backup()
        if name is None:
            name = self.last_catalog()
        return name

    def last_backup(self):
        # Return the name of last backup if found, otherwise, return None
        backup_glob = self.archive_name(date="*") + ".dar"
        try:
            backup = sorted(glob(backup_glob))[-1]
            name = os.path.splitext(os.path.splitext(backup)[0])[0]
            logger.info("Found last backup: {0}".format(name))
            return name
        except IndexError:
            logger.info("No existing backups found!")
            return None

    def last_catalog(self):
        # Return the name of last isolated catalog if found, otherwise,
        # return None
        basename = os.path.basename(self.archive_name(date="*")) + ".dar"
        catalog_glob = os.path.join(self.path, self.catalog_path, basename)
        try:
            catalog = sorted(glob(catalog_glob))[-1]
            name = os.path.splitext(os.path.splitext(catalog)[0])[0]
            logger.info("Found last catalog: {0}".format(name))
            return name
        except IndexError:
            logger.info("No existing catalogs found!")
            return None

    @property
    def args_create(self):
        cmd_par = self.cmd_par_create.format(redundancy=self.redundancy)
        args = self.args_default + [
            "--execute", cmd_par,
            "--fs-root", self.fs_root,
            "--slice", self.size_slice,
        ]
        args += ["--verbose=%s" % item for item in self.verbose]
        for item in self.exclude:
            args += ["--exclude", item]
        for item in self.exclude_compression:
            args += ["-Z", item]
        for item in self.prune:
            args += ["--prune", item]
        if self.is_system:
            for item in self.prune_system:
                args += ["--prune", item]
        logger.info("args_create: {0}".format(args))
        return args

    @property
    def args_test(self):
        args = ["--execute", self.cmd_par_test]
        args += ["--verbose=%s" % item for item in self.verbose]
        logger.info("args_test: {0}".format(args))
        return args


class DarBackup:
    """
    dar backup with settings
    """
    def __init__(self, settings):
        self.settings = settings
        self.is_system = settings.is_system
        self.path = settings.path
        self.catalog_path = settings.catalog_path
        self.archive_name = settings.archive_name()
        self.last_archive_name = settings.last_archive_name()
        self.args_common = settings.args_common
        self.args_create = settings.args_create
        self.args_test = settings.args_test

    def run(self, dry_run=False):
        if self.is_system and (not self.is_running_as_root()):
            raise RuntimeError("Running as root to backup the system root")

        logger.info("Output archive name: {0}".format(self.archive_name))
        if self.last_archive_name is not None:
            self.backup_diff()
        else:
            self.backup_full()

        if not dry_run:
            self.test_backup()
            self.isolate_catalog()

    def backup_full(self):
        logger.info("Start full backup ...")
        cmd = [
            "dar",
            "--create", self.archive_name,
        ] + self.args_common + self.args_create
        logger.info("Command: {0}".format(cmd))
        subprocess.run(cmd, check=True)
        logger.info("Full backup: DONE!")

    def backup_diff(self):
        logger.info("Start differential backup ...")
        cmd = [
            "dar",
            "--create", self.archive_name,
            "--ref", self.last_archive_name,
        ] + self.args_common + self.args_create
        logger.info("Command: {0}".format(cmd))
        subprocess.run(cmd, check=True)
        logger.info("Differential backup: DONE!")

    def test_backup(self):
        logger.info("Test created backup ...")
        cmd = [
            "dar",
            "--test", self.archive_name,
        ] + self.args_common + self.args_test
        logger.info("Command: {0}".format(cmd))
        subprocess.run(cmd, check=True)
        logger.info("Test backup: DONE!")

    def isolate_catalog(self):
        logger.info("Isolate the catalog from backup ...")
        catalog_dir = os.path.join(self.path, self.catalog_path)
        if not os.path.exists(catalog_dir):
            os.mkdir(catalog_dir)
        catalog = os.path.join(catalog_dir,
                               os.path.basename(self.archive_name))
        logger.info("Output catalog: {0}".format(catalog))
        cmd = [
            "dar",
            "--isolate", catalog,
            "--ref", self.archive_name,
        ] + self.args_common
        logger.info("Command: {0}".format(cmd))
        subprocess.run(cmd, check=True)
        logger.info("Isolate backup catalog: DONE!")

    @staticmethod
    def is_running_as_root():
        return os.getuid() == 0


def main():
    parser = argparse.ArgumentParser(
        description="Backup system/data using dar and par2")
    parser.add_argument("-c", "--config", dest="config", required=True,
                        help="configuration file for dar and archive. " +
                        "NOTE: the backup archive will be placed under " +
                        "the same directory as this configuration file")
    parser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true",
                        help="dry run, do not perform any action")
    parser.add_argument("-v", "--verbose", dest="verbose", action="store_true",
                        help="show verbose information")
    args = parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.INFO)

    settings = DarSettings(args.config, verbose=args.verbose,
                           dry_run=args.dry_run)
    dar = DarBackup(settings)
    dar.run(dry_run=args.dry_run)


if __name__ == "__main__":
    main()