#!/usr/local/bin/ruby -w
###########################################################
# threadsplit.rb -- splits up TAG threads for processing #
# Version: 0.1a (minor update) #
# Author: Thomas Adam <thomas_adam16@yahoo.com> #
# Akndments: Heather Stern <star@starshine.org> #
# Date: Friday October 10 2003, 03:12am GMT #
# Last Change: Thursday 18th Dec 2003, 06:35am GMT #
###########################################################
#require 'profile'
# set some variables...
basedir = File.expand_path("~/lgmail/bait") # $(pwd)
sdir = "Raw" # subdir
mb = "Mail98" # name of inbox
mlprefix = Regexp.escape("[TAG]") # prefix for ML
pathname = File.expand_path("#{basedir}/#{sdir}/#{mb}")# mbox location
tagmails = /^Subject: #{mlprefix}\s+([^Rr][^Ee]:?).*/ # regex object
vthread = [] # subject threads
mbox = [] # *the* mbox
total = 0 # output for debug
# generate a list of known threads...
vthread.concat( File.new(pathname, "r").
grep(/^Subject: #{mlprefix}\s+([^Rr][^Ee]:?).*/).uniq()
)
# Read the mbox in question...
File.new(pathname, "r").each() { |a|
case a
when /^(From .*)/; #Start of an e-mail
mbox << [$1]; #append to array
else mbox.last() << a; #body of e-mail (to_a())
end
}
#Originally, I had planned to match the whole line, but I am lazy.
#split(/^From \w+\@\w+([.]\w)+\s+([...] [...]\d+) (\d+:\d+:\d+) \d{4}/)
# controlling block
filenew = [] #The array to hold the threads
#Taking each initiating thread
vthread.each { |b|
#strip the "Subject:" bit from it (used as our filename)
nm = b[8..b.length()]
#open up the mbox in question
mbox.each { |c|
# since the order of the mbox is known, we know that the first matched made
# will be the originating thread. Anything else after that are replies.
if c.any?{ |d| d.include?(nm) }
#puts g # Testing only.
filenew << c # appends to array (note, not the same as Array.concat() )
end
}
# Print something to the screen (debugging...)
$stderr.puts("Processing: #{nm}")
# Remove [TAG] and any meta-shell characters that even "ls -b" hates.
nm.squeeze!(" ") #compacts any multiple-spaces to single ones
nm.gsub!(/ #{mlprefix} /, '').gsub!(/[\s?":.,*+\/]/o, '_')
nm.squeeze!('_')
nm.sub!(/_$/, '')
$stderr.puts("Replies: #{filenew.length()}")
total += filenew.length().to_i()
# Intelligence time. If the length of the thread == 1 then we can assume
# that no-one has answered the thread -- hence "wanted" section. Either
# that, or it is spam, in which case....
if filenew.length() == 1
outfile = File.new("#{basedir}/Q/#{nm}", "w")
outfile.puts(filenew)
filenew = []
else
# Prepare to append thread to file
output = File.new("#{basedir}/T/#{nm}", "w")
output.puts(filenew) # write to file
filenew = [] # start over again with blank array
end
}
$stderr.puts("\nTotal = #{total}")
# That's it, we've finished.