aboutsummaryrefslogtreecommitdiffstats
path: root/cli/cleanup-maildir.py
diff options
context:
space:
mode:
Diffstat (limited to 'cli/cleanup-maildir.py')
-rwxr-xr-xcli/cleanup-maildir.py533
1 files changed, 533 insertions, 0 deletions
diff --git a/cli/cleanup-maildir.py b/cli/cleanup-maildir.py
new file mode 100755
index 0000000..e109ea4
--- /dev/null
+++ b/cli/cleanup-maildir.py
@@ -0,0 +1,533 @@
+#!/usr/bin/python -tt
+#
+# Copyright 2004-2006 Nathaniel W. Turner <nate@houseofnate.net>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation
+# files (the "Software"), to deal in the Software without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following
+# conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+"""
+USAGE
+ cleanup-maildir [OPTION].. COMMAND FOLDERNAME..
+
+DESCRIPTION
+ Cleans up old messages in FOLDERNAME; the exact action taken
+ depends on COMMAND. (See next section.)
+ Note that FOLDERNAME is a name such as 'Drafts', and the
+ corresponding maildir path is determined using the values of
+ maildir-root, folder-prefix, and folder-seperator.
+
+COMMANDS
+ archive - move old messages to subfolders based on message date
+ trash - move old message to trash folder
+ delete - permanently delete old messages
+
+OPTIONS
+ -h, --help
+ Show this help.
+ -q, --quiet
+ Suppress normal output.
+ -v, --verbose
+ Output extra information for testing.
+ -n, --trial-run
+ Do not actually touch any files; just say what would be done.
+ -a, --age=N
+ Only touch messages older than N days. Default is 14 days.
+ -k, --keep-flagged-threads
+ If any messages in a thread are flagged, do not touch them or
+ any other messages in that thread.
+ Note: the thread-detection mechanism is currently base purely on
+ a message's subject. The In-Reply-To header is not currently used.
+ -r, --keep-read
+ If any messages are flagged as READ, do not touch them.
+ -t, --trash-folder=F
+ Use F as trash folder when COMMAND is 'trash'.
+ Default is 'Trash'.
+ --archive-folder=F
+ Use F as the base for constructing archive folders. For example, if F is
+ 'Archive', messages from 2004 might be put in the folder 'Archive.2004'.
+ -d, --archive-hierarchy-depth=N
+ Specify number of subfolders in archive hierarchy; 1 is just
+ the year, 2 is year/month (default), 3 is year/month/day.
+ --maildir-root=F
+ Specifies folder that contains mail folders.
+ Default is "$HOME/Maildir".
+ --folder-seperator=str
+ Folder hierarchy seperator. Default is '.'
+ --folder-prefix=str
+ Folder prefix. Default is '.'
+
+NOTES
+ The following form is accepted for backwards compatibility, but is deprecated:
+ cleanup-maildir --mode=COMMAND [OPTION].. FOLDERNAME..
+
+EXAMPLES
+ # Archive messages in 'Sent Items' folder over 30 days old
+ cleanup-maildir --age=30 archive 'Sent Items'"
+
+ # Delete messages over 2 weeks old in 'Lists/debian-devel' folder,
+ # except messages that are part of a thread containing a flagged message.
+ cleanup-maildir --keep-flagged-threads trash 'Lists.debian-devel'
+"""
+
+__version__ = "0.3.0"
+# $Id$
+# $URL$
+
+import mailbox
+import os.path
+import os
+import rfc822
+import string
+import socket
+import time
+import logging
+import sys
+import getopt
+
+
+def mkMaildir(path):
+ """Make a Maildir structure rooted at 'path'"""
+ os.mkdir(path, 0700)
+ os.mkdir(os.path.join(path, 'tmp'), 0700)
+ os.mkdir(os.path.join(path, 'new'), 0700)
+ os.mkdir(os.path.join(path, 'cur'), 0700)
+
+
+class MaildirWriter(object):
+
+ """Deliver messages into a Maildir"""
+
+ path = None
+ counter = 0
+
+ def __init__(self, path=None):
+ """Create a MaildirWriter that manages the Maildir at 'path'
+
+ Arguments:
+ path -- if specified, used as the default Maildir for this object
+ """
+ if path != None:
+ if not os.path.isdir(path):
+ raise ValueError, 'Path does not exist: %s' % path
+ self.path = path
+ self.logger = logging.getLogger('MaildirWriter')
+
+ def deliver(self, msg, path=None):
+ """Deliver a message to a Maildir
+
+ Arguments:
+ msg -- a message object
+ path -- the path of the Maildir; if None, uses default from __init__
+ """
+ if path != None:
+ self.path = path
+ if self.path == None or not os.path.isdir(self.path):
+ raise ValueError, 'Path does not exist'
+ tryCount = 1
+ srcFile = msg.getFilePath();
+ (dstName, tmpFile, newFile, dstFile) = (None, None, None, None)
+ while 1:
+ try:
+ dstName = "%d.%d_%d.%s" % (int(time.time()), os.getpid(),
+ self.counter, socket.gethostname())
+ tmpFile = os.path.join(os.path.join(self.path, "tmp"), dstName)
+ newFile = os.path.join(os.path.join(self.path, "new"), dstName)
+ self.logger.debug("deliver: attempt copy %s to %s" %
+ (srcFile, tmpFile))
+ os.link(srcFile, tmpFile) # Copy into tmp
+ self.logger.debug("deliver: attempt link to %s" % newFile)
+ os.link(tmpFile, newFile) # Link into new
+ except OSError, (n, s):
+ self.logger.critical(
+ "deliver failed: %s (src=%s tmp=%s new=%s i=%d)" %
+ (s, srcFile, tmpFile, newFile, tryCount))
+ self.logger.info("sleeping")
+ time.sleep(2)
+ tryCount += 1
+ self.counter += 1
+ if tryCount > 10:
+ raise OSError("too many failed delivery attempts")
+ else:
+ break
+
+ # Successful delivery; increment deliver counter
+ self.counter += 1
+
+ # For the rest of this method we are acting as an MUA, not an MDA.
+
+ # Move message to cur and restore any flags
+ dstFile = os.path.join(os.path.join(self.path, "cur"), dstName)
+ if msg.getFlags() != None:
+ dstFile += ':' + msg.getFlags()
+ self.logger.debug("deliver: attempt link to %s" % dstFile)
+ os.link(newFile, dstFile)
+ os.unlink(newFile)
+
+ # Cleanup tmp file
+ os.unlink(tmpFile)
+
+
+class MessageDateError(TypeError):
+ """Indicate that the message date was invalid"""
+ pass
+
+
+class MaildirMessage(rfc822.Message):
+
+ """An email message
+
+ Has extra Maildir-specific attributes
+ """
+
+ def getFilePath(self):
+ if sys.hexversion >= 0x020500F0:
+ return self.fp._file.name
+ else:
+ return self.fp.name
+
+ def isFlagged(self):
+ """return true if the message is flagged as important"""
+ import re
+ fname = self.getFilePath()
+ if re.search(r':.*F', fname) != None:
+ return True
+ return False
+
+ def getFlags(self):
+ """return the flag part of the message's filename"""
+ parts = self.getFilePath().split(':')
+ if len(parts) == 2:
+ return parts[1]
+ return None
+
+ def isNew(self):
+ """return true if the message is marked as unread"""
+ # XXX should really be called isUnread
+ import re
+ fname = self.getFilePath()
+ if re.search(r':.*S', fname) != None:
+ return False
+ return True
+
+ def getSubject(self):
+ """get the message's subject as a unicode string"""
+
+ import email.Header
+ s = self.getheader("Subject")
+ try:
+ return u"".join(map(lambda x: x[0].decode(x[1] or 'ASCII', 'replace'),
+ email.Header.decode_header(s)))
+ except(LookupError):
+ return s
+
+ def getSubjectHash(self):
+ """get the message's subject in a "normalized" form
+
+ This currently means lowercasing and removing any reply or forward
+ indicators.
+ """
+ import re
+ import string
+ s = self.getSubject()
+ if s == None:
+ return '(no subject)'
+ return re.sub(r'^(re|fwd?):\s*', '', string.strip(s.lower()))
+
+ def getDateSent(self):
+ """Get the time of sending from the Date header
+
+ Returns a time object using time.mktime. Not very reliable, because
+ the Date header can be missing or spoofed (and often is, by spammers).
+ Throws a MessageDateError if the Date header is missing or invalid.
+ """
+ dh = self.getheader('Date')
+ if dh == None:
+ return None
+ try:
+ return time.mktime(rfc822.parsedate(dh))
+ except ValueError:
+ raise MessageDateError("message has missing or bad Date")
+ except TypeError: # gets thrown by mktime if parsedate returns None
+ raise MessageDateError("message has missing or bad Date")
+ except OverflowError:
+ raise MessageDateError("message has missing or bad Date")
+
+ def getDateRecd(self):
+ """Get the time the message was received"""
+ # XXX check that stat returns time in UTC, fix if not
+ return os.stat(self.getFilePath())[8]
+
+ def getDateSentOrRecd(self):
+ """Get the time the message was sent, fall back on time received"""
+ try:
+ d = self.getDateSent()
+ if d != None:
+ return d
+ except MessageDateError:
+ pass
+ return self.getDateRecd()
+
+ def getAge(self):
+ """Get the number of seconds since the message was received"""
+ msgTime = self.getDateRecd()
+ msgAge = time.mktime(time.gmtime()) - msgTime
+ return msgAge / (60*60*24)
+
+
+class MaildirCleaner(object):
+
+ """Clean a maildir by deleting or moving old messages"""
+
+ __trashWriter = None
+ __mdWriter = None
+ stats = {'total': 0, 'delete': 0, 'trash': 0, 'archive': 0}
+ keepSubjects = {}
+ archiveFolder = None
+ archiveHierDepth = 2
+ folderBase = None
+ folderPrefix = "."
+ folderSeperator = "."
+ keepFlaggedThreads = False
+ trashFolder = "Trash"
+ isTrialRun = False
+ keepRead = False
+
+ def __init__(self, folderBase=None):
+ """Initialize the MaildirCleaner
+
+ Arguments:
+ folderBase -- the directory in which the folders are found
+ """
+ self.folderBase = folderBase
+ self.__mdWriter = MaildirWriter()
+ self.logger = logging.getLogger('MaildirCleaner')
+ self.logger.setLevel(logging.DEBUG)
+
+ def __getTrashWriter(self):
+ if not self.__trashWriter:
+ path = os.path.join(self.folderBase, self.folderPrefix + self.trashFolder)
+ self.__trashWriter = MaildirWriter(path)
+ return self.__trashWriter
+
+ trashWriter = property(__getTrashWriter)
+
+ def scanSubjects(self, folderName):
+ """Scans for flagged subjects"""
+ self.logger.info("Scanning for flagged subjects...")
+ if (folderName == 'INBOX'):
+ path = self.folderBase
+ else:
+ path = os.path.join(self.folderBase, self.folderPrefix + folderName)
+ maildir = mailbox.Maildir(path, MaildirMessage)
+ self.keepSubjects = {}
+ for i, msg in enumerate(maildir):
+ if msg.isFlagged():
+ self.keepSubjects[msg.getSubjectHash()] = 1
+ self.logger.debug("Flagged (%d): %s", i, msg.getSubjectHash())
+ self.logger.info("Done scanning.")
+
+
+ def clean(self, mode, folderName, minAge):
+
+ """Trashes or archives messages older than minAge days
+
+ Arguments:
+ mode -- the cleaning mode. Valid modes are:
+ trash -- moves the messages to a trash folder
+ archive -- moves the messages to folders based on their date
+ delete -- deletes the messages
+ folderName -- the name of the folder on which to operate
+ This is a name like "Stuff", not a filename
+ minAge -- messages younger than minAge days are left alone
+ """
+
+ if not mode in ('trash', 'archive', 'delete'):
+ raise ValueError
+
+ if (self.keepFlaggedThreads):
+ self.scanSubjects(folderName)
+
+ archiveFolder = self.archiveFolder
+ if (archiveFolder == None):
+ if (folderName == 'INBOX'):
+ archiveFolder = ""
+ else:
+ archiveFolder = folderName
+
+ if (folderName == 'INBOX'):
+ path = self.folderBase
+ else:
+ path = os.path.join(self.folderBase, self.folderPrefix + folderName)
+
+ maildir = mailbox.Maildir(path, MaildirMessage)
+
+ fakeMsg = ""
+ if self.isTrialRun:
+ fakeMsg = "(Not really) "
+
+ # Move old messages
+ for i, msg in enumerate(maildir):
+ if self.keepFlaggedThreads == True \
+ and msg.getSubjectHash() in self.keepSubjects:
+ self.log(logging.DEBUG, "Keeping #%d (topic flagged)" % i, msg)
+ else:
+ if (msg.getAge() >= minAge) and ((not self.keepRead) or (self.keepRead and msg.isNew())):
+ if mode == 'trash':
+ self.log(logging.INFO, "%sTrashing #%d (old)" %
+ (fakeMsg, i), msg)
+ if not self.isTrialRun:
+ self.trashWriter.deliver(msg)
+ os.unlink(msg.getFilePath())
+ elif mode == 'delete':
+ self.log(logging.INFO, "%sDeleting #%d (old)" %
+ (fakeMsg, i), msg)
+ if not self.isTrialRun:
+ os.unlink(msg.getFilePath())
+ else: # mode == 'archive'
+ # Determine subfolder path
+ mdate = time.gmtime(msg.getDateSentOrRecd())
+ datePart = str(mdate[0])
+ if self.archiveHierDepth > 1:
+ datePart += self.folderSeperator \
+ + time.strftime("%m-%b", mdate)
+ if self.archiveHierDepth > 2:
+ datePart += self.folderSeperator \
+ + time.strftime("%d-%a", mdate)
+ subFolder = archiveFolder + self.folderSeperator \
+ + datePart
+ sfPath = os.path.join(self.folderBase,
+ self.folderPrefix + subFolder)
+ self.log(logging.INFO, "%sArchiving #%d to %s" %
+ (fakeMsg, i, subFolder), msg)
+ if not self.isTrialRun:
+ # Create the subfolder if needed
+ if not os.path.exists(sfPath):
+ mkMaildir(sfPath)
+ # Deliver
+ self.__mdWriter.deliver(msg, sfPath)
+ os.unlink(msg.getFilePath())
+ self.stats[mode] += 1
+ else:
+ self.log(logging.DEBUG, "Keeping #%d (fresh)" % i, msg)
+ self.stats['total'] += 1
+
+ def log(self, lvl, text, msgObj):
+ """Log some text with the subject of a message"""
+ subj = msgObj.getSubject()
+ if subj == None:
+ subj = "(no subject)"
+ self.logger.log(lvl, text + ": " + subj)
+
+
+# Defaults
+minAge = 14
+mode = None
+
+logging.basicConfig()
+logging.getLogger().setLevel(logging.DEBUG)
+logging.disable(logging.INFO - 1)
+logger = logging.getLogger('cleanup-maildir')
+cleaner = MaildirCleaner()
+
+# Read command-line arguments
+try:
+ opts, args = getopt.getopt(sys.argv[1:],
+ "hqvnrm:t:a:kd:",
+ ["help", "quiet", "verbose", "version", "mode=", "trash-folder=",
+ "age=", "keep-flagged-threads", "keep-read", "folder-seperator=",
+ "folder-prefix=", "maildir-root=", "archive-folder=",
+ "archive-hierarchy-depth=", "trial-run"])
+except getopt.GetoptError, (msg, opt):
+ logger.error("%s\n\n%s" % (msg, __doc__))
+ sys.exit(2)
+output = None
+for o, a in opts:
+ if o in ("-h", "--help"):
+ print __doc__
+ sys.exit()
+ if o in ("-q", "--quiet"):
+ logging.disable(logging.WARNING - 1)
+ if o in ("-v", "--verbose"):
+ logging.disable(logging.DEBUG - 1)
+ if o == "--version":
+ print __version__
+ sys.exit()
+ if o in ("-n", "--trial-run"):
+ cleaner.isTrialRun = True
+ if o in ("-m", "--mode"):
+ logger.warning("the --mode flag is deprecated (see --help)")
+ if a in ('trash', 'archive', 'delete'):
+ mode = a
+ else:
+ logger.error("%s is not a valid command" % a)
+ sys.exit(2)
+ if o in ("-t", "--trash-folder"):
+ cleaner.trashFolder = a
+ if o == "--archive-folder":
+ cleaner.archiveFolder = a
+ if o in ("-a", "--age"):
+ minAge = int(a)
+ if o in ("-k", "--keep-flagged-threads"):
+ cleaner.keepFlaggedThreads = True
+ if o in ("-r", "--keep-read"):
+ cleaner.keepRead = True
+ if o == "--folder-seperator":
+ cleaner.folderSeperator = a
+ if o == "--folder-prefix":
+ cleaner.folderPrefix = a
+ if o == "--maildir-root":
+ cleaner.folderBase = a
+ if o in ("-d", "--archive-hierarchy-depth"):
+ archiveHierDepth = int(a)
+ if archiveHierDepth < 1 or archiveHierDepth > 3:
+ sys.stderr.write("Error: archive hierarchy depth must be 1, " +
+ "2, or 3.\n")
+ sys.exit(2)
+ cleaner.archiveHierDepth = archiveHierDepth
+
+if not cleaner.folderBase:
+ cleaner.folderBase = os.path.join(os.environ["HOME"], "Maildir")
+if mode == None:
+ if len(args) < 1:
+ logger.error("No command specified")
+ sys.stderr.write(__doc__)
+ sys.exit(2)
+ mode = args.pop(0)
+ if not mode in ('trash', 'archive', 'delete'):
+ logger.error("%s is not a valid command" % mode)
+ sys.exit(2)
+
+if len(args) == 0:
+ logger.error("No folder(s) specified")
+ sys.stderr.write(__doc__)
+ sys.exit(2)
+
+logger.debug("Mode is " + mode)
+
+# Clean each folder
+for dir in args:
+ logger.debug("Cleaning up %s..." % dir)
+ cleaner.clean(mode, dir, minAge)
+
+logger.info('Total messages: %5d' % cleaner.stats['total'])
+logger.info('Affected messages: %5d' % cleaner.stats[mode])
+logger.info('Untouched messages: %5d' %
+ (cleaner.stats['total'] - cleaner.stats[mode]))