#!/usr/local/bin/ruby -w ########################################################### # threadsplit.rb -- splits up TAG threads for processing # # Version: 0.1a (minor update) # # Author: Thomas Adam <thomas_adam16@yahoo.com> # # Akndments: Heather Stern <star@starshine.org> # # Date: Friday October 10 2003, 03:12am GMT # # Last Change: Thursday 18th Dec 2003, 06:35am GMT # ########################################################### #require 'profile' # set some variables... basedir = File.expand_path("~/lgmail/bait") # $(pwd) sdir = "Raw" # subdir mb = "Mail98" # name of inbox mlprefix = Regexp.escape("[TAG]") # prefix for ML pathname = File.expand_path("#{basedir}/#{sdir}/#{mb}")# mbox location tagmails = /^Subject: #{mlprefix}\s+([^Rr][^Ee]:?).*/ # regex object vthread = [] # subject threads mbox = [] # *the* mbox total = 0 # output for debug # generate a list of known threads... vthread.concat( File.new(pathname, "r"). grep(/^Subject: #{mlprefix}\s+([^Rr][^Ee]:?).*/).uniq() ) # Read the mbox in question... File.new(pathname, "r").each() { |a| case a when /^(From .*)/; #Start of an e-mail mbox << [$1]; #append to array else mbox.last() << a; #body of e-mail (to_a()) end } #Originally, I had planned to match the whole line, but I am lazy. #split(/^From \w+\@\w+([.]\w)+\s+([...] [...]\d+) (\d+:\d+:\d+) \d{4}/) # controlling block filenew = [] #The array to hold the threads #Taking each initiating thread vthread.each { |b| #strip the "Subject:" bit from it (used as our filename) nm = b[8..b.length()] #open up the mbox in question mbox.each { |c| # since the order of the mbox is known, we know that the first matched made # will be the originating thread. Anything else after that are replies. if c.any?{ |d| d.include?(nm) } #puts g # Testing only. filenew << c # appends to array (note, not the same as Array.concat() ) end } # Print something to the screen (debugging...) $stderr.puts("Processing: #{nm}") # Remove [TAG] and any meta-shell characters that even "ls -b" hates. nm.squeeze!(" ") #compacts any multiple-spaces to single ones nm.gsub!(/ #{mlprefix} /, '').gsub!(/[\s?":.,*+\/]/o, '_') nm.squeeze!('_') nm.sub!(/_$/, '') $stderr.puts("Replies: #{filenew.length()}") total += filenew.length().to_i() # Intelligence time. If the length of the thread == 1 then we can assume # that no-one has answered the thread -- hence "wanted" section. Either # that, or it is spam, in which case.... if filenew.length() == 1 outfile = File.new("#{basedir}/Q/#{nm}", "w") outfile.puts(filenew) filenew = [] else # Prepare to append thread to file output = File.new("#{basedir}/T/#{nm}", "w") output.puts(filenew) # write to file filenew = [] # start over again with blank array end } $stderr.puts("\nTotal = #{total}") # That's it, we've finished.