diff options
author | Aaron LI <aaronly.me@outlook.com> | 2016-03-31 10:49:48 +0800 |
---|---|---|
committer | Aaron LI <aaronly.me@outlook.com> | 2016-03-31 10:49:48 +0800 |
commit | a374fac3362a0204a93a3a50011366239a80fc1b (patch) | |
tree | 8b5e9646e8a7e715942b96aea8267e27157a3c36 /python/xkeywordsync.py | |
parent | d6cb7d0636c94612c667d6555001ec50dd81ea4f (diff) | |
download | atoolbox-a374fac3362a0204a93a3a50011366239a80fc1b.tar.bz2 |
add several python scripts
Diffstat (limited to 'python/xkeywordsync.py')
-rw-r--r-- | python/xkeywordsync.py | 533 |
1 files changed, 533 insertions, 0 deletions
diff --git a/python/xkeywordsync.py b/python/xkeywordsync.py new file mode 100644 index 0000000..73f48b9 --- /dev/null +++ b/python/xkeywordsync.py @@ -0,0 +1,533 @@ +#!/bin/usr/env python3 +# -*- coding: utf-8 -*- +# +# Credits: +# [1] Gaute Hope: gauteh/abunchoftags +# https://github.com/gauteh/abunchoftags/blob/master/keywsync.cc +# +# TODO: +# * Support case-insensitive tags merge +# (ref: http://stackoverflow.com/a/1480230) +# * Accept a specified mtime, and only deal with files with newer mtime. +# +# Aaron LI +# Created: 2016-01-24 +# + +""" +Sync message 'X-Keywords' header with notmuch tags. + +* tags-to-keywords: + Check if the messages in the query have a matching 'X-Keywords' header + to the list of notmuch tags. + If not, update the 'X-Keywords' and re-write the message. + +* keywords-to-tags: + Check if the messages in the query have matching notmuch tags to the + 'X-Keywords' header. + If not, update the tags in the notmuch database. + +* merge-keywords-tags: + Merge the 'X-Keywords' labels and notmuch tags, and update both. +""" + +__version__ = "0.1.2" +__date__ = "2016-01-25" + +import os +import sys +import argparse +import email + +# Require Python 3.4, or install package 'enum34' +from enum import Enum + +from notmuch import Database, Query + +from imapUTF7 import imapUTF7Decode, imapUTF7Encode + + +class SyncDirection(Enum): + """ + Synchronization direction + """ + MERGE_KEYWORDS_TAGS = 0 # Merge 'X-Keywords' and notmuch tags and + # update both + KEYWORDS_TO_TAGS = 1 # Sync 'X-Keywords' header to notmuch tags + TAGS_TO_KEYWORDS = 2 # Sync notmuch tags to 'X-Keywords' header + +class SyncMode(Enum): + """ + Sync mode + """ + ADD_REMOVE = 0 # Allow add & remove tags/keywords + ADD_ONLY = 1 # Only allow add tags/keywords + REMOVE_ONLY = 2 # Only allow remove tags/keywords + + +class KwMessage: + """ + Message class to deal with 'X-Keywords' header synchronization + with notmuch tags. + + NOTE: + * The same message may have multiple files with different keywords + (e.g, the same message exported under each label by Gmail) + managed by OfflineIMAP. + For example: a message file in OfflineIMAP synced folder of + '[Gmail]/All Mail' have keywords ['google', 'test']; however, + the file in synced folder 'test' of the same message only have + keywords ['google'] without the keyword 'test'. + * All files associated to the same message are regarded as the same. + The keywords are extracted from all files and merged. + And the same updated keywords are written back to all files, which + results all files finally having the same 'X-Keywords' header. + * You may only sync the '[Gmail]/All Mail' folder without other + folders exported according the labels by Gmail. + """ + # Replace some special characters before mapping keyword to tag + enable_replace_chars = True + chars_replace = { + '/' : '.', + } + # Mapping between (Gmail) keywords and notmuch tags (before ignoring tags) + keywords_mapping = { + '\\Inbox' : 'inbox', + '\\Important' : 'important', + '\\Starred' : 'flagged', + '\\Sent' : 'sent', + '\\Muted' : 'killed', + '\\Draft' : 'draft', + '\\Trash' : 'deleted', + '\\Junk' : 'spam', + } + # Tags ignored from syncing + # These tags are either internal tags or tags handled by maildir flags. + enable_ignore_tags = True + tags_ignored = set([ + 'new', 'unread', 'attachment', 'signed', 'encrypted', + 'flagged', 'replied', 'passed', 'draft', + ]) + # Ignore case when merging tags + tags_ignorecase = True + + # Whether the tags updated against the message 'X-Keywords' header + tags_updated = False + # Added & removed tags for notmuch database against 'X-Keywords' + tags_added = [] + tags_removed = [] + # Newly updated/merged notmuch tags against 'X-Keywords' + tags_new = [] + + # Whether the keywords updated against the notmuch tags + keywords_updated = False + # Added & removed tags for 'X-Keywords' against notmuch database + tags_kw_added = [] + tags_kw_removed = [] + # Newly updated/merged tags for 'X-Keywords' against notmuch database + tags_kw_new = [] + + def __init__(self, msg, filename=None): + self.message = msg + self.filename = filename + self.allfiles = [ fn for fn in msg.get_filenames() ] + self.tags = set(msg.get_tags()) + + def sync(self, direction, mode=SyncMode.ADD_REMOVE, + dryrun=False, verbose=False): + """ + Wrapper function to sync between 'X-Keywords' and notmuch tags. + """ + if direction == SyncDirection.KEYWORDS_TO_TAGS: + self.sync_keywords_to_tags(sync_mode=mode, dryrun=dryrun, + verbose=verbose) + elif direction == SyncDirection.TAGS_TO_KEYWORDS: + self.sync_tags_to_keywords(sync_mode=mode, dryrun=dryrun, + verbose=verbose) + elif direction == SyncDirection.MERGE_KEYWORDS_TAGS: + self.merge_keywords_tags(sync_mode=mode, dryrun=dryrun, + verbose=verbose) + else: + raise ValueError("Invalid sync direction: %s" % direction) + + def sync_keywords_to_tags(self, sync_mode=SyncMode.ADD_REMOVE, + dryrun=False, verbose=False): + """ + Wrapper function to sync 'X-Keywords' to notmuch tags. + """ + self.get_keywords() + self.map_keywords() + self.merge_tags(sync_direction=SyncDirection.KEYWORDS_TO_TAGS, + sync_mode=sync_mode) + if dryrun or verbose: + print('* MSG: %s' % self.message) + print(' TAG: [%s] +[%s] -[%s] => [%s]' % ( + ','.join(self.tags), ','.join(self.tags_added), + ','.join(self.tags_removed), ','.join(self.tags_new))) + if not dryrun: + self.update_tags() + + def sync_tags_to_keywords(self, sync_mode=SyncMode.ADD_REMOVE, + dryrun=False, verbose=False): + """ + Wrapper function to sync notmuch tags to 'X-Keywords' + """ + self.get_keywords() + self.map_keywords() + self.merge_tags(sync_direction=SyncDirection.TAGS_TO_KEYWORDS, + sync_mode=sync_mode) + keywords_new = self.map_tags(tags=self.tags_kw_new) + if dryrun or verbose: + print('* MSG: %s' % self.message) + print('* FILES: %s' % ' ; '.join(self.allfiles)) + print(' XKW: {%s} +[%s] -[%s] => {%s}' % ( + ','.join(self.keywords), ','.join(self.tags_kw_added), + ','.join(self.tags_kw_removed), ','.join(keywords_new))) + if not dryrun: + self.update_keywords(keywords_new=keywords_new) + + def merge_keywords_tags(self, sync_mode=SyncMode.ADD_REMOVE, + dryrun=False, verbose=False): + """ + Wrapper function to merge 'X-Keywords' and notmuch tags + """ + self.get_keywords() + self.map_keywords() + self.merge_tags(sync_direction=SyncDirection.MERGE_KEYWORDS_TAGS, + sync_mode=sync_mode) + keywords_new = self.map_tags(tags=self.tags_kw_new) + if dryrun or verbose: + print('* MSG: %s' % self.message) + print('* FILES: %s' % ' ; '.join(self.allfiles)) + print(' TAG: [%s] +[%s] -[%s] => [%s]' % ( + ','.join(self.tags), ','.join(self.tags_added), + ','.join(self.tags_removed), ','.join(self.tags_new))) + print(' XKW: {%s} +[%s] -[%s] => {%s}' % ( + ','.join(self.keywords), ','.join(self.tags_kw_added), + ','.join(self.tags_kw_removed), ','.join(keywords_new))) + if not dryrun: + self.update_tags() + self.update_keywords(keywords_new=keywords_new) + + def get_keywords(self): + """ + Get 'X-Keywords' header from all files associated with the same + message, decode, split and merge. + + NOTE: Do NOT simply use the `message.get_header()` method, which + cannot get the complete keywords from all files. + """ + keywords_utf7 = [] + for fn in self.allfiles: + msg = email.message_from_file(open(fn, 'r')) + val = msg['X-Keywords'] + if val: + keywords_utf7.append(val) + else: + print("WARNING: 'X-Keywords' header not found or empty " +\ + "for file: %s" % fn, file=sys.stderr) + keywords_utf7 = ','.join(keywords_utf7) + if keywords_utf7 != '': + keywords = imapUTF7Decode(keywords_utf7.encode()).split(',') + keywords = [ kw.strip() for kw in keywords ] + # Remove duplications + keywords = set(keywords) + else: + keywords = set() + self.keywords = keywords + return keywords + + def map_keywords(self, keywords=None): + """ + Map keywords to notmuch tags according to the mapping table. + """ + if keywords is None: + keywords = self.keywords + if self.enable_replace_chars: + # Replace specified characters in keywords + trans = str.maketrans(self.chars_replace) + keywords = [ kw.translate(trans) for kw in keywords ] + # Map keywords to tags + tags = set([ self.keywords_mapping.get(kw, kw) for kw in keywords ]) + self.tags_kw = tags + return tags + + def map_tags(self, tags=None): + """ + Map tags to keywords according to the inversed mapping table. + """ + if tags is None: + tags = self.tags + if self.enable_replace_chars: + # Inversely replace specified characters in tags + chars_replace_inv = { v: k for k, v in self.chars_replace.items() } + trans = str.maketrans(chars_replace_inv) + tags = [ tag.translate(trans) for tag in tags ] + # Map keywords to tags + keywords_mapping_inv = { v:k for k,v in self.keywords_mapping.items() } + keywords = set([ keywords_mapping_inv.get(tag, tag) for tag in tags ]) + self.keywords_tags = keywords + return keywords + + def merge_tags(self, sync_direction, sync_mode=SyncMode.ADD_REMOVE, + tags_nm=None, tags_kw=None): + """ + Merge the tags from notmuch database and 'X-Keywords' header, + according to the specified sync direction and operation restriction. + + TODO: support case-insensitive set operations + """ + # Added & removed tags for notmuch database against 'X-Keywords' + tags_added = [] + tags_removed = [] + # Newly updated/merged notmuch tags against 'X-Keywords' + tags_new = [] + # Added & removed tags for 'X-Keywords' against notmuch database + tags_kw_added = [] + tags_kw_removed = [] + # Newly updated/merged tags for 'X-Keywords' against notmuch database + tags_kw_new = [] + # + if tags_nm is None: + tags_nm = self.tags + if tags_kw is None: + tags_kw = self.tags_kw + if self.enable_ignore_tags: + # Remove ignored tags before merge + tags_nm2 = tags_nm.difference(self.tags_ignored) + tags_kw2 = tags_kw.difference(self.tags_ignored) + else: + tags_nm2 = tags_nm + tags_kw2 = tags_kw + # + if sync_direction == SyncDirection.KEYWORDS_TO_TAGS: + # Sync 'X-Keywords' to notmuch tags + tags_added = tags_kw2.difference(tags_nm2) + tags_removed = tags_nm2.difference(tags_kw2) + elif sync_direction == SyncDirection.TAGS_TO_KEYWORDS: + # Sync notmuch tags to 'X-Keywords' + tags_kw_added = tags_nm2.difference(tags_kw2) + tags_kw_removed = tags_kw2.difference(tags_nm2) + elif sync_direction == SyncDirection.MERGE_KEYWORDS_TAGS: + # Merge both notmuch tags and 'X-Keywords' + tags_merged = tags_nm2.union(tags_kw2) + # notmuch tags + tags_added = tags_merged.difference(tags_nm2) + tags_removed = tags_nm2.difference(tags_merged) + # tags for 'X-Keywords' + tags_kw_added = tags_merged.difference(tags_kw2) + tags_kw_removed = tags_kw2.difference(tags_merged) + else: + raise ValueError("Invalid synchronization direction") + # Apply sync operation restriction + self.tags_added = [] + self.tags_removed = [] + self.tags_kw_added = [] + self.tags_kw_removed = [] + tags_new = tags_nm # Use un-ignored notmuch tags + tags_kw_new = tags_kw # Use un-ignored 'X-Keywords' tags + if sync_mode != SyncMode.REMOVE_ONLY: + self.tags_added = tags_added + self.tags_kw_added = tags_kw_added + tags_new = tags_new.union(tags_added) + tags_kw_new = tags_kw_new.union(tags_kw_added) + if sync_mode != SyncMode.ADD_ONLY: + self.tags_removed = tags_removed + self.tags_kw_removed = tags_kw_removed + tags_new = tags_new.difference(tags_removed) + tags_kw_new = tags_kw_new.difference(tags_kw_removed) + # + self.tags_new = tags_new + self.tags_kw_new = tags_kw_new + if self.tags_added or self.tags_removed: + self.tags_updated = True + if self.tags_kw_added or self.tags_kw_removed: + self.keywords_updated = True + # + return { + 'tags_updated' : self.tags_updated, + 'tags_added' : self.tags_added, + 'tags_removed' : self.tags_removed, + 'tags_new' : self.tags_new, + 'keywords_updated' : self.keywords_updated, + 'tags_kw_added' : self.tags_kw_added, + 'tags_kw_removed' : self.tags_kw_removed, + 'tags_kw_new' : self.tags_kw_new, + } + + def update_keywords(self, keywords_new=None, outfile=None): + """ + Encode the keywords (default: self.keywords_new) and write back to + all message files. + + If parameter 'outfile' specified, then write the updated message + to that file instead of overwriting. + + NOTE: + * The modification time of the message file should be kept to prevent + OfflineIMAP from treating it as a new one (and the previous a + deleted one). + * All files associated with the same message are updated to have + the same 'X-Keywords' header. + """ + if not self.keywords_updated: + # keywords NOT updated, just skip + return + + if keywords_new is None: + keywords_new = self.keywords_new + # + if outfile is not None: + infile = self.allfiles[0:1] + outfile = [ os.path.expanduser(outfile) ] + else: + infile = self.allfiles + outfile = self.allfiles + # + for ifname, ofname in zip(infile, outfile): + msg = email.message_from_file(open(ifname, 'r')) + fstat = os.stat(ifname) + if keywords_new == []: + # Delete 'X-Keywords' header + print("WARNING: delete 'X-Keywords' header from file: %s" % + ifname, file=sys.stderr) + del msg['X-Keywords'] + else: + # Update 'X-Keywords' header + keywords = ','.join(keywords_new) + keywords_utf7 = imapUTF7Encode(keywords).decode() + # Delete then add, to avoid multiple occurrences + del msg['X-Keywords'] + msg['X-Keywords'] = keywords_utf7 + # Write updated message + with open(ofname, 'w') as fp: + fp.write(msg.as_string()) + # Reset the timestamps + os.utime(ofname, ns=(fstat.st_atime_ns, fstat.st_mtime_ns)) + + def update_tags(self, tags_added=None, tags_removed=None): + """ + Update notmuch tags according to keywords. + """ + if not self.tags_updated: + # tags NOT updated, just skip + return + + if tags_added is None: + tags_added = self.tags_added + if tags_removed is None: + tags_removed = self.tags_removed + # Use freeze/thaw for safer transactions to change tag values. + self.message.freeze() + for tag in tags_added: + self.message.add_tag(tag, sync_maildir_flags=False) + for tag in tags_removed: + self.message.remove_tag(tag, sync_maildir_flags=False) + self.message.thaw() + + +def get_notmuch_revision(dbpath=None): + """ + Get the current revision and UUID of notmuch database. + """ + import subprocess + import tempfile + if dbpath: + tf = tempfile.NamedTemporaryFile() + # Create a minimal notmuch config for the specified dbpath + config = '[database]\npath=%s\n' % os.path.expanduser(dbpath) + tf.file.write(config.encode()) + tf.file.flush() + cmd = 'notmuch --config=%s count --lastmod' % tf.name + output = subprocess.check_output(cmd, shell=True) + tf.close() + else: + cmd = 'notmuch count --lastmod' + output = subprocess.check_output(cmd, shell=True) + # Extract output + dbinfo = output.decode().split() + return { 'revision': int(dbinfo[2]), 'uuid': dbinfo[1] } + + +def main(): + parser = argparse.ArgumentParser( + description="Sync message 'X-Keywords' header with notmuch tags.") + parser.add_argument("-V", "--version", action="version", + version="%(prog)s " + "v%s (%s)" % (__version__, __date__)) + parser.add_argument("-q", "--query", dest="query", required=True, + help="notmuch database query string") + parser.add_argument("-p", "--db-path", dest="dbpath", + help="notmuch database path (default to try user configuration)") + parser.add_argument("-n", "--dry-run", dest="dryrun", + action="store_true", help="dry run") + parser.add_argument("-v", "--verbose", dest="verbose", + action="store_true", help="show verbose information") + # Exclusive argument group for sync mode + exgroup1 = parser.add_mutually_exclusive_group(required=True) + exgroup1.add_argument("-m", "--merge-keywords-tags", + dest="direction_merge", action="store_true", + help="merge 'X-Keywords' and tags and update both") + exgroup1.add_argument("-k", "--keywords-to-tags", + dest="direction_keywords2tags", action="store_true", + help="sync 'X-Keywords' to notmuch tags") + exgroup1.add_argument("-t", "--tags-to-keywords", + dest="direction_tags2keywords", action="store_true", + help="sync notmuch tags to 'X-Keywords'") + # Exclusive argument group for tag operation mode + exgroup2 = parser.add_mutually_exclusive_group(required=False) + exgroup2.add_argument("-a", "--add-only", dest="mode_addonly", + action="store_true", help="only add notmuch tags") + exgroup2.add_argument("-r", "--remove-only", dest="mode_removeonly", + action="store_true", help="only remove notmuch tags") + # Parse + args = parser.parse_args() + # Sync direction + if args.direction_merge: + sync_direction = SyncDirection.MERGE_KEYWORDS_TAGS + elif args.direction_keywords2tags: + sync_direction = SyncDirection.KEYWORDS_TO_TAGS + elif args.direction_tags2keywords: + sync_direction = SyncDirection.TAGS_TO_KEYWORDS + else: + raise ValueError("Invalid synchronization direction") + # Sync mode + if args.mode_addonly: + sync_mode = SyncMode.ADD_ONLY + elif args.mode_removeonly: + sync_mode = SyncMode.REMOVE_ONLY + else: + sync_mode = SyncMode.ADD_REMOVE + # + if args.dbpath: + dbpath = os.path.abspath(os.path.expanduser(args.dbpath)) + else: + dbpath = None + # + db = Database(path=dbpath, create=False, mode=Database.MODE.READ_WRITE) + dbinfo = get_notmuch_revision(dbpath=dbpath) + q = Query(db, args.query) + total_msgs = q.count_messages() + msgs = q.search_messages() + # + if args.verbose: + print("# Notmuch database path: %s" % dbpath) + print("# Database revision: %d (uuid: %s)" % + (dbinfo['revision'], dbinfo['uuid'])) + print("# Query: %s" % args.query) + print("# Sync direction: %s" % sync_direction.name) + print("# Sync mode: %s" % sync_mode.name) + print("# Total messages to check: %d" % total_msgs) + print("# Dryn run: %s" % args.dryrun) + # + for msg in msgs: + kwmsg = KwMessage(msg) + kwmsg.sync(direction=sync_direction, mode=sync_mode, + dryrun=args.dryrun, verbose=args.verbose) + # + db.close() + + +if __name__ == "__main__": + main() + +# vim: set ts=4 sw=4 tw=0 fenc= ft=python: # |