import re, time, os

ASSUMEYEAR=2009
IGNORES= [\
  '<em>','</em>',
  '<strong>','</strong>','<strrong>',
  '<center>','</center>' ]
DEBUG= 0

def parserickdate(str):
  for fmt in ['%x','%a %b %d %Y']:
    try:
      r= time.strptime(str,fmt)
    except:
      if DEBUG: print "couldnt parse %s with %s" % (str,fmt)
      continue
    return r
  return 0

def cleanuptimestr(str):
  r= str
  if DEBUG: print "cleanuptimestr(%s)" % (str)
  if not r: return r
  m= re.match('^([0-9])([apAP]|am|AM|pm|PM)$',r)
  if m != None: 
    r= '0%s:00%s' % (m.group(1),m.group(2))
  m= re.match('^([0-1][0-9])([apAP]|am|AM|pm|PM)$',r)
  if m != None: 
    r= '%s:00%s' % (m.group(1),m.group(2))
  m= re.match('^([0-9])$',r)
  if m != None: 
    return '0%s:00' % m.group(1)
  m= re.match('^([0-9][^0-9].*)$',r)
  if m != None: 
    r= '0%s' % (m.group(1))
  m= re.match('^([0-1][0-9])([apAP])$',r)
  if m != None: 
    r= '%s:00%s' % (m.group(1),m.group(2))
  m= re.match('^([0-1][0-9]:[0-5][0-9])([aA]|am|AM)$',r)
  if m != None: 
    r= '%s' % (m.group(1))
  m= re.match('^12:([0-5][0-9])([pP]|pm|PM)$',r)
  if m != None: 
    r= '12:%s' % m.group(1)
  else:
    m= re.match('^([0-1][0-9]):([0-5][0-9])([pP]|pm|PM)$',r)
    if m != None: 
      r= '%02d:%s' % (int(m.group(1))+12,m.group(2))
  if DEBUG: print "cleanuptimestr(%s) return %r" % (str,r)
  return r

def parsedt(str):
  for fmt in ['%Y%m%d %H:%M','%Y%m%d %I:%M%P','%Y%m%d %H','%Y%m%d %I%P']:
    try:
      r= time.strptime(str,fmt)
    except:
      if DEBUG: print "couldnt parse %s with %s" % (str,fmt)
      continue
    return r
  return 0

print "BEGIN:VCALENDAR"
print "VERSION"
print ":2.0"
print "PRODID"
print ":-//Mozilla.org/NONSGML Mozilla Calendar V1.0//EN"

for line in open('t').readlines():
  line= line.strip()
  if not line:
    if DEBUG: print "ignoring empty line"
    continue
  for ignore in IGNORES:
    line= line.replace(ignore,'')
  if DEBUG: print "read: %s" % line
  m= re.match('^(.*)<tr[^>]*>(.*)</tr>(.*)$',line)
  if m == None:
    if DEBUG: print "ignoring line[%s]" % line
    continue
  if m.group(1): print "ignoring prefix[%s]" % m.group(1)
  if m.group(3): print "ignoring prefix[%s]" % m.group(3)
  row= m.group(2)
  if DEBUG: print "processing row[%s]" % row
  m= re.match('^([^<]*)<td[^>]*>(.*)</td>(.*)$',row)
  if m == None:
    print "bad row: no items [%s]" % row
    continue
  if m.group(1): print "ignoring stuff before row[%s]" % m.group(1)
  if m.group(3): print "ignoring stuff after row[%s]" % m.group(3)
  items= re.split('</td><td[^>]*>',m.group(2))
  if DEBUG: print "items: %r" % items
#item: Sat. Jul. 4
#item: 2p-6p
#item: Felton
#item: <a href="#feltonlug">FeltonLUG</a>  meeting at Felton Firehouse, 131 Kirby St.
  datestr= '%s %d' % (items[0].replace('.',''),ASSUMEYEAR)
  eventdate= parserickdate(datestr)
  if not eventdate:
    print "bad date %s" % datestr
    continue
  if DEBUG: print "date: %s" % time.strftime('%Y-%m-%d',eventdate)
  yyyymmdd= time.strftime('%Y%m%d',eventdate)
  timestrs= items[1].split('-')
  if len(timestrs) != 2:
    print "expected time range, got [%s]" % items[1].split('-')
  starthhmm= cleanuptimestr(timestrs[0])
  endhhmm= cleanuptimestr(timestrs[1])
  eventstartstr= "%s %s" % (yyyymmdd,starthhmm)
  eventendstr= "%s %s" % (yyyymmdd,endhhmm)
  eventstart= parsedt(eventstartstr)
  eventend= parsedt(eventendstr)
  if not eventstart:
      print "bad start: %s in %r" % (eventstart,items)
      continue
  if not eventend:
      print "bad end: %s in %r" % (eventend,items)
      continue
  for dt in [eventstart,eventend]:
    if not dt:
      print "bad date/time: %s" % dt
      continue
  print "BEGIN:VEVENT"
  print time.strftime("DTSTART:%Y%m%dT%H%M00",eventstart)
  print time.strftime("DTEND:%Y%m%dT%H%M00",eventend)
  print "SUMMARY:%s: %s" % (items[3],items[2])
  print "END:VEVENT"

print "END:VCALENDAR"


