#!/usr/bin/env python2.4 # -*-*- encoding:iso-8859-15 filetype:python expandtab:yes tabstop:4 shiftwidth:4 softtabstop:4 # autoindent:yes -*-*- # # havard@dahle.no GPL (C) 2005-2006 # # SYNOPSIS # # This script must live in both the sending and receiving ends. # It prepares a file or a directory, chops it into smtp-server friendly # slices and ships it away. # # On the other end of the channel, a cousin of the sender script # receives the slices one by one, and pieces them togheter. # It then sends a message to the sender to tell her that all went well. # # REQUIREMENTS # /usr/bin/mutt # /usr/bin/md5sum # /usr/bin/mail # /usr/bin/split # /usr/bin/tar # # INSTALLATION # # =Sending end= # This is easy. Just put it in your path and say # smtp_copy # # =Receiving end= # Put the script somewhere, make it executable and edit a .qmail or # .procmail recipe to push the message on to the script. # # ==.qmail example== # # |/path/to/smtp_copy # # # ==.procmail example== # ? # # __version__ = "smtp copy 0.16" import sys import glob __doc__ = """This program copies files or directories over SMTP. Version: %s Sending: %s Receiving: %s < rfc2822.msg (typically from within a .qmail or a .procmail recipe) Reassembling: %s -r (%s does that itself when the last part has been received, but sometimes it needs a nudge) It depends on these external binaries: md5sum, mutt, tar, split, mail """ % (__version__, sys.argv[0], sys.argv[0], sys.argv[0], sys.argv[0]) SCRATCHDIR = "~/tmp/smtp_copy" DEBUG = 0 try: import email, os, re, time from email.Header import decode_header from email.Iterators import typed_subpart_iterator, body_line_iterator from email.MIMEMultipart import MIMEMultipart from email.MIMEBase import MIMEBase from email.MIMEText import MIMEText from email import Encoders from tempfile import mkdtemp from string import join import smtplib except ImportError: print "Import Error" sys.exit(100) SCRATCHDIR = os.path.expanduser(SCRATCHDIR) def extract(msg): """extract(Email.Message) -> True or SMTPArchive. Email.Message must be a valid Email.Message. It must have the following properties (see SMTPPart.send()): * Subject: archive_name/part_here/parts_total/part_name E.g. big_doc###1###7###big_doc.part1.rar -> Part one of the 7-part archive "big_doc" * A text/plain part containing metadata: timestamp:%i orig_resource:%s archive_name:%s archive_parts:%i md5sum:%s part_size:%i version:%s * An application/octet-stream part as the actual slice of the archive. Returns SMTPArchive if the full archive has arrived. If some parts are still missing, returns True if the part extraction succeeded. """ # Get essential info from subject line subj = join([z for z in decode_header(msg.get("Subject"))[0] if z is not None]) archive_name, part_here, parts_total, part_name = subj.split("###", 3) archive_name = archive_name.strip() part_here = part_here.strip() parts_total = parts_total.strip() part_name = part_name.strip() dbg( "Received slice of %s: #%s of %s - %s" % (archive_name, int(part_here)+1, parts_total, part_name)) # the first mime part should contain metadata metapart = typed_subpart_iterator(msg, "text", "plain").next() meta = {} for line in metapart.get_payload(decode=True).split("\n"): # read cheat sheet try: key, value = line.split(":") if value.isdigit(): value = int(value) meta[key.strip()] = value except: pass # check to see if this really is an smtp copy slice if not meta.has_key("version") or meta['version'].find("smtp copy") == -1: print "Why, I don't think this really is an smtp copy slice!" sys.exit(100) # Ok, let's roll up our sleeves, this is the real mackay if meta['version'] != __version__: #compare versions slice_ver = float(meta['version'][10:]) this_ver = float(__version__[10:]) if slice_ver > this_ver: w = "newer" else: w = "older" print "Beware! This slice was packaged by my cousin, %s, which is %s than me! (I am %s)" % \ (meta['version'], w, __version__) print "There may be incompabilities. Keep your cross ready" tmpdir = os.path.join(SCRATCHDIR, archive_name, str(meta['timestamp'])) if not os.path.exists(tmpdir): os.makedirs(tmpdir) subparts = typed_subpart_iterator(msg, "application", "x-smtpcopy") # look through "application" parts try: mimepart = subparts.next() # there is really only one except StopIteration: print "Crap! Could not find application/x-smtpcopy payload!" sys.exit(100) filename = os.path.join(tmpdir, part_name) dbg( "writing part to "+ filename) fd = file(filename, "w") fd.write(mimepart.get_payload(decode=1)) fd.close() dbg( "saved part %s in %s" % (part_name, tmpdir)) # if this is not the last part, finish import glob parts_extracted = glob.glob(os.path.join(tmpdir, "%s_*" % archive_name)) # find all slices (including those previously extracted) if len(parts_extracted) != int(parts_total): return True # more parts to come # this is the last part, so assemble the full archive archive = SMTPArchive(archive_name) # create the archive archive.in_dir = tmpdir archive.meta = meta for part in parts_extracted: archive.add_part_from_file(part) return archive class SMTPArchive: parts = {} part_size = 512 # in kbytes meta = {'md5sum':0,} in_dir = "" # where in filesystem are the slices destress_interval = 7 # sleep after this many slices destress_length = 180 # sleep for how long each interval email_address = "havard@aerosat.co.za" def __init__(self, name, part_size=None, destress_interval=None): self.name = os.path.basename(name).replace(" ", "_") self.timestamp = int(time.time()) if part_size is not None: self.part_size = part_size if destress_interval is not None: self.destress_interval = destress_interval def create(self, from_what): "create(file-or-dir) -> ret. Creates an archive set from file-or-dir." if not os.path.exists(from_what): raise "Baby jesus! path does not exist!" #sys.exit(4) if not from_what.startswith("/") and not from_what.startswith(".."): self.orig_resource = from_what else: # strip leading slash self.orig_resource = from_what[1:] self.meta['md5sum'] = self.md5sum(from_what) tmpdir = mkdtemp(dir=SCRATCHDIR) self.in_dir = tmpdir # tar zcv TUX_Issue11_March2006.pdf | split -d -b 1m - smtp_copy/tmpQU6a5h/TUX- rarcmd = "tar zc '%s' | split --numeric-suffixes --suffix-length=4 --bytes %ik - '%s_'" % \ (from_what, self.part_size, os.path.join(tmpdir, self.name)) dbg( "rarcmd: "+ rarcmd ) ret = os.system(rarcmd) if ret != 0: raise "Baby jesus! Trouble creating godahm archive" _parts = glob.glob(os.path.join(tmpdir, "%s_*" % self.name)) if not _parts: raise "Lord Mercy! No parts found! What happened?" dbg( "found parts: %s " % len(_parts) ) for p in _parts: self.add_part_from_file(p) def reassemble(self): "reassemble() -> final_file_name. Put all slices back together" if not self.check_all_parts(): # there is no part#1! Something not right raise "Holy Hologram! We don't have all parts" unpackcmd = 'cat "%s_"* | tar vzx -C "%s"' % (os.path.join(self.in_dir, self.name), self.in_dir) dbg( "unpackcmd: "+unpackcmd) out = os.popen(unpackcmd) unpacked = [l for l in out.read().split("\n") if len(l) > 0] ret = out.close() if ret is not None: #something fishy with unrarcmd raise "unpack failed with code %s" % ret if self.meta.has_key('orig_resource'): full_path_to_archive = os.path.join(self.in_dir, self.meta['orig_resource']) dbg( "looking for archve in: "+ full_path_to_archive ) if not os.path.exists(full_path_to_archive): #panic! baby jesus raise "Archive not extracted, but it should've been!" if self.meta.has_key('md5sum'): if self.meta['md5sum'] != self.md5sum(full_path_to_archive): # double panic! raise "Archive does not pass md5 sum test" else: dbg("md5sum matches. Thank heavens!") print "All good!" return unpacked def md5sum(self, file): "Calculates md5 hash of file" if not os.path.isfile(file): return 0 md5cmd = os.popen("md5sum '%s'" % file).read() md5 = md5cmd.split()[0] dbg( "md5sum of %s: %s" % (file, md5) ) return md5 def send(self, email_address): "Sends the archive slices per SMTP" ret = [] for no, part in self.parts.iteritems(): if no >= self.destress_interval and no % self.destress_interval == 0: # pause for a bit, to not stress the system by swamping it with too many slices print "Sleeping %i seconds to destress system" % self.destress_length time.sleep(self.destress_length) ret.append(part.send(email_address)) return ret def get_metadata(self): "Get relevant metadata for archive" return """ timestamp:%i orig_resource:%s archive_name:%s archive_parts:%i md5sum:%s part_size:%i version:%s """ % (self.timestamp, self.orig_resource, self.name, len(self.parts), self.meta['md5sum'], self.part_size, __version__) def add_part(self, part): "add a part to archive set" return self.parts.update({part.partno : part}) def add_part_from_file(self, filename): "add a part to archive set from filename" return self.add_part(SMTPPart(filename, self)) def check_all_parts(self): "checks that all parts are there" #if not self.meta.has_key('archive_parts'): return True # we do not know how many there should be for partno in range(self.meta['archive_parts']): #this is how many _should_ behere if not self.parts.has_key(partno): return False partname = os.path.join(self.in_dir, self.parts[partno].filename) if not os.path.exists(partname): return False return True class SMTPPart: filename = "" archive = None partno = None def __init__(self, filename, archive): self.filename = os.path.basename(filename) self.archive = archive try: re_partno = re.search('%s_(\d+)$' % self.archive.name, filename) self.partno = int(re_partno.group(1)) except: raise "I'm having trouble believing this is really a part, my young Sir!" def xsend(self, email_address): "Send part through SMTP" print "Sending slice#%i" % self.partno subject = "%s###%i###%i###%s" % (self.archive.name, self.partno, len(self.archive.parts), self.filename) # echo "" | mutt -F /dev/null -s bildetjolli -a "${bilde}" havardda-bildebloggpost@orakel.ntnu.no muttcmd = "/usr/bin/mutt -F /dev/null -s '%s' -a '%s' %s" % (subject, os.path.join(self.archive.in_dir, self.filename), email_address) dbg("Sending archive slice to " + email_address) dbg("Executing"+ muttcmd) mua_in, mua_out = os.popen4(muttcmd) mua_in.write(self.archive.get_metadata()) ret = mua_in.close() # analyze mua_out (output from mutt) # FIXME: how to get this to catch errors? dbg('mutt says: ', mua_out.read()) return True def send(self, email_to): # Create the enclosing (outer) message outer = MIMEMultipart() subject = "%s###%i###%i###%s" % (self.archive.name, self.partno, len(self.archive.parts), self.filename) outer['Subject'] = subject outer['To'] = email_to outer['From'] = self.archive.email_address outer.preamble = 'You will not see this in a MIME-aware mail reader.\n' # To guarantee the message ends with a newline outer.epilogue = '' txt = MIMEText(self.archive.get_metadata()) outer.attach(txt) fp = open(os.path.join(self.archive.in_dir, self.filename), 'rb') msg = MIMEBase('application', 'x-smtpcopy') msg.set_payload(fp.read()) fp.close() # Encode the payload using Base64 Encoders.encode_base64(msg) # Set the filename parameter msg.add_header('Content-Disposition', 'attachment', filename=self.filename) outer.attach(msg) # Now send the message s = smtplib.SMTP() s.connect() s.sendmail(self.archive.email_address, [email_to,], outer.as_string()) s.close() return True def dbg(*str): if DEBUG: for s in str: print "[SMTPCOPY]: %s" % s if __name__ == "__main__": if len(sys.argv) == 1 or "-h" in sys.argv or "--help" in sys.argv: print __doc__ sys.exit(100) if "-v" in sys.argv: DEBUG=1 sys.argv.remove("-v") dbg("Good evening. This is %s" % __version__) if sys.stdin.isatty(): # interactively run if "-r" in sys.argv: #reassembling archive from dir given in sys.argv[1] sys.argv.remove('-r') _dir = sys.argv[1] firstpart = glob.glob(os.path.join(_dir, "*_0000")) arc = SMTPArchive(os.path.basename(firstpart[0])[:-5]) parts = glob.glob(os.path.join(_dir, "%s_*" % arc.name)) meta = {'archive_parts':len(parts)} arc.in_dir = _dir arc.meta = meta for p in parts: arc.add_part_from_file(p) final_files = arc.reassemble() print "You can find these files in %s" % _dir print join(final_files) else: #creating archive from resource in sys.argv[1] arcname = sys.argv[1].replace(" ", "_") #if len(arcname) > 20: arcname = arcname[:20] arc = SMTPArchive(arcname) arc.create(sys.argv[1]) print "%i parts of size %ik created. Sending..." % (len(arc.parts), arc.part_size) ret = arc.send(sys.argv[2]) print "%i parts successfully sent, %i failed" % \ (len([yes for yes in ret if yes]), len([no for no in ret if not no])) else: # we are receiving dbg( "Receiving... ") part = email.message_from_file(sys.stdin) archive = extract(part) if archive: print "Part was extracted successfully. Grand." if isinstance(archive, SMTPArchive): #we have received all slices # do we need to sleep here? is there a race possibility? dbg("All parts received, reassembling...") final_file = archive.reassemble() subj = "Archive '%s' successfully received" % archive.name mua = os.popen("/usr/bin/mail -s '%s' %s" % (subj, os.getenv('SENDER')), "w") report = """ Received archive: %s Extracted to: %s Thanks. -- %s running on %s """ % (archive.name, final_file, __version__, os.popen('uname -n').read().strip()) mua.write(report) mua.close() sys.exit(0)