diff options
author | Aaron LI <aaronly.me@outlook.com> | 2016-11-26 17:44:37 +0800 |
---|---|---|
committer | Aaron LI <aaronly.me@outlook.com> | 2016-11-26 17:44:37 +0800 |
commit | 3b2eaa5e704b84c1e85b6cfed14c9b70ff6839e0 (patch) | |
tree | 1a5a33503b41ca4ee8e2d36b0ce7dfc3fe7a7687 /fg21sim | |
parent | b57474b17e507328c2be458ac13a16ed4ea0e6ac (diff) | |
download | fg21sim-3b2eaa5e704b84c1e85b6cfed14c9b70ff6839e0.tar.bz2 |
utils: Add download.py with function "download_file()"
Diffstat (limited to 'fg21sim')
-rw-r--r-- | fg21sim/utils/download.py | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/fg21sim/utils/download.py b/fg21sim/utils/download.py new file mode 100644 index 0000000..27feeed --- /dev/null +++ b/fg21sim/utils/download.py @@ -0,0 +1,112 @@ +# Copyright (c) 2016 Weitian LI <liweitianux@live.com> +# MIT license + +""" +Utilities to download files +""" + +import os +import subprocess +import logging + +from .hashutil import calc_md5 + + +logger = logging.getLogger(__name__) + + +def _check_filesize(filepath, size): + """ + Check whether the file size equals the given size. + + Parameters + ---------- + filepath : str + Path to the input file + size : int + Expected/true size (in bytes) of the file + + Returns + ------- + valid : bool + ``True`` if the on-disk file size equals the given size, + otherwise ``False``. + """ + size_ondisk = os.path.getsize(filepath) + return size_ondisk == size + + +def _check_md5(filepath, md5): + """ + Check whether the file MD5 digest matches the given MD5. + + Parameters + ---------- + filepath : str + Path to the input file + md5 : str + Expected/true MD5 digest of the file + + Returns + ------- + valid : bool + ``True`` if the on-disk file MD5 digest matches the given MD5, + otherwise ``False``. + """ + md5_ondisk = calc_md5(filepath) + return md5_ondisk == md5 + + +def download_file(url, outfile=None, size=None, md5=None, clobber=False): + """ + Download file using "wget" and validate the file size and MD5 digest. + + If the expected MD5 digest is provided, and the on-disk file has the + same MD5 digest, then the download is skipped. + + If the output file already exists but with unmatched MD5, then + re-download it if ``clobber=True``, otherwise, an ``IOError`` raised. + + Parameters + ---------- + url : str + The URL from where to download the file. + outfile : str, optional + The path and filename for the downloaded file. + If not provided, then use the basename of the URL. + size : int, optional + Expected/true size (in bytes) of the file + If provided, then check the file size after download. + md5 : str, optional + Expected/true MD5 digest of the file + If provided, then check the MD5 digest after download. + clobber : bool, optional + Whether to overwrite the existing file? + + Raises + ------ + IOError : + Output file with unmatched MD5 digest already exists + while ``clobber=False``. + """ + if outfile is None: + outfile = os.path.basename(url) + # Check whether can skip the download + if os.path.exists(outfile): + if (md5 is not None) and _check_md5(outfile, md5): + logger.info("Skip already downloaded file: {0}".format(outfile)) + return + elif clobber: + os.remove(outfile) + logger.info("Removed wrong existing file: {0}".format(outfile)) + else: + raise IOError("Exists wrong output file: {0}".format(outfile)) + # + cmd = ["wget", "-O", outfile, url] + logger.info("CMD: {0}".format(" ".join(cmd))) + subprocess.check_call(cmd) + if (size is not None) and (not _check_filesize(outfile, size)): + raise ValueError("Downloaded file has wrong size") + if (md5 is not None) and (not _check_md5(outfile, md5)): + raise ValueError("Downloaded file has unmatched MD5 digest") + logger.info("Downloaded and validated file: {0}".format(outfile)) |