aboutsummaryrefslogtreecommitdiffstats
path: root/fg21sim/utils/download.py
diff options
context:
space:
mode:
Diffstat (limited to 'fg21sim/utils/download.py')
-rw-r--r--fg21sim/utils/download.py112
1 files changed, 112 insertions, 0 deletions
diff --git a/fg21sim/utils/download.py b/fg21sim/utils/download.py
new file mode 100644
index 0000000..27feeed
--- /dev/null
+++ b/fg21sim/utils/download.py
@@ -0,0 +1,112 @@
+# Copyright (c) 2016 Weitian LI <liweitianux@live.com>
+# MIT license
+
+"""
+Utilities to download files
+"""
+
+import os
+import subprocess
+import logging
+
+from .hashutil import calc_md5
+
+
+logger = logging.getLogger(__name__)
+
+
+def _check_filesize(filepath, size):
+ """
+ Check whether the file size equals the given size.
+
+ Parameters
+ ----------
+ filepath : str
+ Path to the input file
+ size : int
+ Expected/true size (in bytes) of the file
+
+ Returns
+ -------
+ valid : bool
+ ``True`` if the on-disk file size equals the given size,
+ otherwise ``False``.
+ """
+ size_ondisk = os.path.getsize(filepath)
+ return size_ondisk == size
+
+
+def _check_md5(filepath, md5):
+ """
+ Check whether the file MD5 digest matches the given MD5.
+
+ Parameters
+ ----------
+ filepath : str
+ Path to the input file
+ md5 : str
+ Expected/true MD5 digest of the file
+
+ Returns
+ -------
+ valid : bool
+ ``True`` if the on-disk file MD5 digest matches the given MD5,
+ otherwise ``False``.
+ """
+ md5_ondisk = calc_md5(filepath)
+ return md5_ondisk == md5
+
+
+def download_file(url, outfile=None, size=None, md5=None, clobber=False):
+ """
+ Download file using "wget" and validate the file size and MD5 digest.
+
+ If the expected MD5 digest is provided, and the on-disk file has the
+ same MD5 digest, then the download is skipped.
+
+ If the output file already exists but with unmatched MD5, then
+ re-download it if ``clobber=True``, otherwise, an ``IOError`` raised.
+
+ Parameters
+ ----------
+ url : str
+ The URL from where to download the file.
+ outfile : str, optional
+ The path and filename for the downloaded file.
+ If not provided, then use the basename of the URL.
+ size : int, optional
+ Expected/true size (in bytes) of the file
+ If provided, then check the file size after download.
+ md5 : str, optional
+ Expected/true MD5 digest of the file
+ If provided, then check the MD5 digest after download.
+ clobber : bool, optional
+ Whether to overwrite the existing file?
+
+ Raises
+ ------
+ IOError :
+ Output file with unmatched MD5 digest already exists
+ while ``clobber=False``.
+ """
+ if outfile is None:
+ outfile = os.path.basename(url)
+ # Check whether can skip the download
+ if os.path.exists(outfile):
+ if (md5 is not None) and _check_md5(outfile, md5):
+ logger.info("Skip already downloaded file: {0}".format(outfile))
+ return
+ elif clobber:
+ os.remove(outfile)
+ logger.info("Removed wrong existing file: {0}".format(outfile))
+ else:
+ raise IOError("Exists wrong output file: {0}".format(outfile))
+ #
+ cmd = ["wget", "-O", outfile, url]
+ logger.info("CMD: {0}".format(" ".join(cmd)))
+ subprocess.check_call(cmd)
+ if (size is not None) and (not _check_filesize(outfile, size)):
+ raise ValueError("Downloaded file has wrong size")
+ if (md5 is not None) and (not _check_md5(outfile, md5)):
+ raise ValueError("Downloaded file has unmatched MD5 digest")
+ logger.info("Downloaded and validated file: {0}".format(outfile))