#!/usr/bin/env python2.4 # -*-*- encoding:iso-8859-15 filetype:python expandtab:yes tabstop:4 shiftwidth:4 softtabstop:4 # autoindent:yes -*-*- # # havard@dahle.no GPL (C) 2005-2006 # # SYNOPSIS # # This script must live in both the sending and receiving ends. # It prepares a file or a directory, chops it into smtp-server friendly # slices and ships it away. # # On the other end of the channel, a cousin of the sender script # receives the slices one by one, and pieces them togheter. # It then sends a message to the sender to tell her that all went well. # # REQUIREMENTS # /usr/bin/mutt # /usr/bin/md5sum # /usr/bin/mail # /usr/bin/split # /usr/bin/tar # # INSTALLATION # # =Sending end= # This is easy. Just put it in your path and say # smtp_copy # # =Receiving end= # Put the script somewhere, make it executable and edit a .qmail or # .procmail recipe to push the message on to the script. # # ==.qmail example== # # |/path/to/smtp_copy # # # ==.procmail example== # ? # # __version__ = "smtp copy 0.14" import sys __doc__ = """This program copies files or directories over SMTP. Sending: %s Receiving: %s < rfc2822.msg (typically from within a .qmail or a .procmail recipe) It depends on these external binaries: md5sum, mutt, tar, split, mail """ % (sys.argv[0], sys.argv[0]) SCRATCHDIR = "~/tmp/smtp_copy" DEBUG = 0 try: import email, os, re, time from email.Header import decode_header from email.Iterators import typed_subpart_iterator, body_line_iterator from tempfile import mkdtemp from string import join except ImportError: print "Import Error" sys.exit(100) SCRATCHDIR = os.path.expanduser(SCRATCHDIR) def extract(msg): """extract(Email.Message) -> True or SMTPArchive. Email.Message must be a valid Email.Message. It must have the following properties (see SMTPPart.send()): * Subject: archive_name/part_here/parts_total/part_name E.g. big_doc###1###7###big_doc.part1.rar -> Part one of the 7-part archive "big_doc" * A text/plain part containing metadata: timestamp:%i orig_resource:%s archive_name:%s archive_parts:%i md5sum:%s part_size:%i version:%s * An application/octet-stream part as the actual slice of the archive. Returns SMTPArchive if the full archive has arrived. If some parts are still missing, returns True if the part extraction succeeded. """ # Get essential info from subject line subj = join([z for z in decode_header(msg.get("Subject"))[0] if z is not None]) archive_name, part_here, parts_total, part_name = subj.split("###", 3) archive_name = archive_name.strip() part_here = part_here.strip() parts_total = parts_total.strip() part_name = part_name.strip() dbg( "Received slice of %s: #%s of %s - %s" % (archive_name, int(part_here)+1, parts_total, part_name)) # the first mime part should contain metadata metapart = typed_subpart_iterator(msg, "text", "plain").next() meta = {} for line in metapart.get_payload(decode=True).split("\n"): # read cheat sheet try: key, value = line.split(":") if value.isdigit(): value = int(value) meta[key.strip()] = value except: pass # check to see if this really is an smtp copy slice if not meta.has_key("version") or meta['version'].find("smtp copy") == -1: print "Why, I don't think this really is an smtp copy slice!" sys.exit(100) # Ok, let's roll up our sleeves, this is the real mackay if meta['version'] != __version__: #compare versions slice_ver = float(meta['version'][10:]) this_ver = float(__version__[10:]) if slice_ver > this_ver: w = "newer" else: w = "older" print "Beware! This slice was packaged by my cousin, %s, which is %s than me! (I am %s)" % \ (meta['version'], w, __version__) print "There may be incompabilities. Keep your cross ready" tmpdir = os.path.join(SCRATCHDIR, archive_name, str(meta['timestamp'])) if not os.path.exists(tmpdir): os.makedirs(tmpdir) subparts = typed_subpart_iterator(msg, "application") # look through "application" parts try: mimepart = subparts.next() # there is really only one except StopIteration: print "Crap! Could not find application/rar payload!" sys.exit(100) filename = os.path.join(tmpdir, part_name) dbg( "writing part to "+ filename) fd = file(filename, "w") fd.write(mimepart.get_payload(decode=1)) fd.close() dbg( "saved part %s in %s" % (part_name, tmpdir)) # if this is not the last part, finish import glob parts_extracted = glob.glob(os.path.join(tmpdir, "%s_*" % archive_name)) # find all slices (including those previously extracted) if len(parts_extracted) != int(parts_total): return True # more parts to come # this is the last part, so assemble the full archive archive = SMTPArchive(archive_name) # create the archive archive.in_dir = tmpdir archive.meta = meta for part in parts_extracted: archive.add_part_from_file(part) return archive class SMTPArchive: parts = {} part_size = 1 # in Mbytes meta = {'md5sum':0,} in_dir = "" # where in filesystem are the slices destress_interval = 10 # sleep after this many slices destress_length = 30 # sleep for how long each interval def __init__(self, name): self.name = os.path.basename(name).replace(" ", "_") self.timestamp = int(time.time()) def create(self, from_what): "create(file-or-dir) -> ret. Creates an archive set from file-or-dir." if not os.path.exists(from_what): raise "Baby jesus! path does not exist!" #sys.exit(4) if not from_what.startswith("/") and not from_what.startswith(".."): self.orig_resource = from_what else: # strip leading slash self.orig_resource = from_what[1:] self.meta['md5sum'] = self.md5sum(from_what) tmpdir = mkdtemp(dir=SCRATCHDIR) self.in_dir = tmpdir # tar zcv TUX_Issue11_March2006.pdf | split -d -b 1m - smtp_copy/tmpQU6a5h/TUX- rarcmd = "tar zc '%s' | split --numeric-suffixes --suffix-length=4 --bytes %im - '%s_'" % \ (from_what, self.part_size, os.path.join(tmpdir, self.name)) dbg( "rarcmd: "+ rarcmd ) ret = os.system(rarcmd) if ret != 0: raise "Baby jesus! Trouble creating godahm archive" import glob _parts = glob.glob(os.path.join(tmpdir, "%s_*" % self.name)) if not _parts: raise "Lord Mercy! No parts found! What happened?" dbg( "found parts: %s " % len(_parts) ) for p in _parts: self.add_part_from_file(p) def reassemble(self): "reassemble() -> final_file_name. Put all slices back together" if not self.check_all_parts(): # there is no part#1! Something not right raise "Holy Hologram! We don't have all parts" #unrarcmd = "rar x -y '%s' '%s'" % (os.path.join(self.in_dir, first_part.filename), #self.in_dir) #unrarcmd = "unrar e -ep -y '%s' '%s'" % (self.parts[0].filename, self.in_dir) unpackcmd = 'cat "%s_"* | tar zx -C "%s"' % (os.path.join(self.in_dir, self.name), self.in_dir) dbg( "unpackcmd: "+unpackcmd) ret = os.system(unpackcmd) if ret != 0: #something fishy with unrarcmd raise "unrar failed with code %s" % ret full_path_to_archive = os.path.join(self.in_dir, self.meta['orig_resource']) dbg( "looking for archve in: "+ full_path_to_archive ) if not os.path.exists(full_path_to_archive): #panic! baby jesus raise "Archive not extracted, but it should've been!" if self.meta['md5sum'] != self.md5sum(full_path_to_archive): # double panic! raise "Archive does not pass md5 sum test" print "All good!" return full_path_to_archive def md5sum(self, file): "Calculates md5 hash of file" if not os.path.isfile(file): return 0 md5cmd = os.popen("md5sum '%s'" % file).read() md5 = md5cmd.split()[0] dbg( "md5sum of %s: %s" % (file, md5) ) return md5 def send(self, email_address): "Sends the archive slices per SMTP" ret = [] for no, part in self.parts.iteritems(): if no >= self.destress_interval and no % self.destress_interval == 0: # pause for a bit, to not stress the system by swamping it with too many slices dbg("Sleeping %i seconds to destress system" % self.destress_length) time.sleep(self.destress_length) ret.append(part.send(email_address)) return ret def get_metadata(self): "Get relevant metadata for archive" return """ timestamp:%i orig_resource:%s archive_name:%s archive_parts:%i md5sum:%s part_size:%i version:%s """ % (self.timestamp, self.orig_resource, self.name, len(self.parts), self.meta['md5sum'], self.part_size, __version__) def add_part(self, part): "add a part to archive set" return self.parts.update({part.partno : part}) def add_part_from_file(self, filename): "add a part to archive set from filename" return self.add_part(SMTPPart(filename, self)) def check_all_parts(self): "checks that all parts are there" for partno in range(self.meta['archive_parts']): #this is how many _should_ behere if not self.parts.has_key(partno): return False partname = os.path.join(self.in_dir, self.parts[partno].filename) if not os.path.exists(partname): return False return True class SMTPPart: filename = "" archive = None partno = None def __init__(self, filename, archive): self.filename = os.path.basename(filename) self.archive = archive try: re_partno = re.search('%s_(\d+)$' % self.archive.name, filename) self.partno = int(re_partno.group(1)) except: raise "I'm having trouble believing this is really a part, my young Sir!" def send(self, email_address): "Send part through SMTP" print "Sending slice#%i" % self.partno subject = "%s###%i###%i###%s" % (self.archive.name, self.partno, len(self.archive.parts), self.filename) # echo "" | mutt -F /dev/null -s bildetjolli -a "${bilde}" havardda-bildebloggpost@orakel.ntnu.no muttcmd = "/usr/bin/mutt -F /dev/null -s '%s' -a '%s' %s" % (subject, os.path.join(self.archive.in_dir, self.filename), email_address) dbg("Sending archive slice to " + email_address) dbg("Executing"+ muttcmd) mua_in, mua_out = os.popen4(muttcmd) mua_in.write(self.archive.get_metadata()) mua_in.close() # analyze mua_out (output from mutt) # FIXME: how to get this to catch errors? dbg('mutt says: ', mua_out.read()) return True def dbg(*str): if DEBUG: for s in str: print "[SMTPCOPY]: %s" % s if __name__ == "__main__": if "-v" in sys.argv: DEBUG=1 sys.argv.remove("-v") if "-h" in sys.argv or "--help" in sys.argv: print __doc__ sys.exit(100) dbg("Good evening. This is %s" % __version__) if sys.stdin.isatty(): # interactively run # we have contact! let's create an archive! arcname = sys.argv[1].replace(" ", "_") #if len(arcname) > 20: arcname = arcname[:20] arc = SMTPArchive(arcname) arc.create(sys.argv[1]) ret = arc.send(sys.argv[2]) print "%i parts successfully sent, %i failed" % \ (len([yes for yes in ret if yes]), len([no for no in ret if not no])) else: # we are receiving dbg( "Receiving... ") part = email.message_from_file(sys.stdin) archive = extract(part) if isinstance(archive, SMTPArchive): #we have received all slices # do we need to sleep here? is there a race possibility? dbg("All parts received, reassembling...") final_file = archive.reassemble() subj = "Archive '%s' successfully received" % archive.name mua = os.popen("/usr/bin/mail -s '%s' %s" % (subj, os.getenv('SENDER')), "w") report = """ Received archive: %s Extracted to: %s Thanks. %s running on %s """ % (archive.name, final_file, __version__, os.popen('uname -n').read().strip()) mua.write(report) mua.close() sys.exit(0)