""" flightCal -- make hCalendar from text flight info :Author: `Dan Connolly`_ :Version: $Revision: 1.15 $ of $Date: 2006/04/21 07:29:00 $ :Copyright: `W3C Open Source License`_ Share and enjoy. .. _Dan Connolly: http://www.w3.org/People/Connolly/ .. _W3C Open Source License: http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 Note: we use str.rsplit() which is new in python2.4 Usage ----- Run a la:: python2.4 title flightinfo.html where flightinfo.txt is a text flight itinerary in a format produced, I think, by Sabre_. .. _Sabre: http://en.wikipedia.org/wiki/Sabre_%28computer_system%29 Testing and Colophon -------------------- The examples in the docstrings below are executable doctest_ unit tests. Check them a la:: python flightCal.py --test .. _doctest: http://www.python.org/doc/lib/module-doctest.html This module is documented in rst_ format for use with epydoc_. .. _epydoc: http://epydoc.sourceforge.net/ .. _rst: http://docutils.sourceforge.net/docs/user/rst/quickstart.html """ import re from xml.sax.saxutils import escape as xmldata import aptdata # from http://dev.w3.org/cvsweb/2001/palmagent/ import tzwhere __version__ = "$Id: flightCal.py,v 1.15 2006/04/21 07:29:00 connolly Exp $" __docformat__ = 'restructuredtext en' def main(argv): import sys web = webarg(argv) convert(sys.stdin, sys.stdout, web, argv[1]) TestData = """ 25 FEB 06 - SATURDAY AIR AMERICAN AIRLINES FLT:1557 ECONOMY LV KANSAS CITY INTL 512P EQP: MD-80 DEPART: TERMINAL BUILDING C 01HR 33MIN AR CHICAGO OHARE 645P NON-STOP ARRIVE: TERMINAL 3 REF: JKLWXS CONNOLLY/DANIEL SEAT-26F AA-XAB123 AIR AMERICAN AIRLINES FLT:98 ECONOMY MULTI MEALS LV CHICAGO OHARE 1025P EQP: BOEING 777 DEPART: TERMINAL 3 07HR 35MIN 26 FEB 06 - SUNDAY AR LONDON HEATHROW 1200N NON-STOP ARRIVE: TERMINAL 3 REF: JKLWXS CONNOLLY/DANIEL SEAT-34J AA-XAB123 AIR AMERICAN AIRLINES FLT:6580 ECONOMY MEALS OPERATED BY BRITISH AIRWAYS LV LONDON HEATHROW 415P EQP: BOEING 757 DEPART: TERMINAL 1 02HR 00MIN AR NICE 715P NON-STOP ARRIVE: AEROGARE 1 REF: JKLWXS """ _Test2 = \ """ 17 JUL 06 - MONDAY AIR UNITED AIRLINES FLT:1106 ECONOMY LV KANSAS CITY INTL 1050A EQP: BOEING 737 300 DEPART: TERMINAL BUILDING A 01HR 29MIN AR CHICAGO OHARE 1219P NON-STOP ARRIVE: TERMINAL 1 REF: K2TM2Q AIR UNITED AIRLINES FLT:534 ECONOMY LV CHICAGO OHARE 1255P EQP: BOEING 737 300 DEPART: TERMINAL 1 02HR 21MIN AR BOSTON 416P NON-STOP ARRIVE: TERMINAL C REF: K2TM2Q """ _Test3 = """ 03 OCT 06 - TUESDAY AIR AMERICAN AIRLINES FLT:1876 ECONOMY LV KANSAS CITY INTL 625A EQP: MD-80 DEPART: TERMINAL BUILDING C 01HR 30MIN AR DALLAS FT WORTH 755A NON-STOP REF: FPLMBA CONNOLLY/DANIEL SEAT-26D AA-XDW5282 AIR AMERICAN AIRLINES FLT:1379 ECONOMY FOOD FOR PURCHASE LV DALLAS FT WORTH 1010A EQP: MD-80 DEPART: TERMINAL D 04HR 20MIN AR VANCOUVER BC 1230P NON-STOP ARRIVE: MAIN TERMINAL REF: FPLMBA CONNOLLY/DANIEL SEAT-9D AA-XDW5282 06 OCT 06 - FRIDAY AIR AMERICAN AIRLINES FLT:282 ECONOMY FOOD FOR PURCHASE LV VANCOUVER BC 845A EQP: MD-80 DEPART: MAIN TERMINAL 04HR 10MIN AR DALLAS FT WORTH 255P NON-STOP ARRIVE: TERMINAL D REF: FPLMBA CONNOLLY/DANIEL SEAT-9D AA-XDW5282 AIR AMERICAN AIRLINES FLT:1966 ECONOMY LV DALLAS FT WORTH 350P EQP: MD-80 01HR 30MIN AR KANSAS CITY INTL 520P NON-STOP ARRIVE: TERMINAL BUILDING C REF: FPLMBA CONNOLLY/DANIEL SEAT-30D AA-XDW5282 """ # Sabre seems to have its own way of naming airports. # I can't find a list of them online. # So in a file called Airports, we list them, a la: # BOS BOSTON # CDG PARIS DE GAULLE # ... Airports = [s.strip().split(" ", 1) for s in file("Airports")] class Template(object): def __init__(self, title, events): self.title = title self.events = events def generate(self): yield DocTop % {'title': xmldata(self.title)} for e in self.events: yield "\n" yield "\n" e['url_x'] = xmldata(e['url']) # a bit of a kludge... if not e.has_key('SEAT'): e['SEAT'] = '?' expandKeys(e) yield FlightCell % e yield "\n" yield markupPlace(e['location']['lv']) if e.has_key('dtstart_date-time'): yield DepartCell % e else: yield DepartCellFloat % e yield "\n" yield markupPlace(e['location']['ar']) if e.has_key('dtend_date-time'): yield ArriveCell % e else: yield ArriveCellFloat % e yield "\n" yield "\n" yield DocBottom % self def expandKeys(d, d2=None, pfx=''): """ >>> expandKeys({'dtstart': {'float': '1999-01-01'}}) {'dtstart': {'float': '1999-01-01'}, 'dtstart_float': '1999-01-01'} """ if d2 is None: d2 = d for k,v in d2.items(): k = (pfx and pfx + '_' or '') + k if type(v) is type({}): expandKeys(d, v, k) elif pfx: d[k] = v return d def markupPlace(where): if where.has_key('geo'): expandKeys(where) if where.has_key('adr_extended-address'): return GeoTerminalDiv % where else: return GeoDiv % where else: return "
%s
\n" % \ (where['fn']['text']) DocTop = u""" %(title)s\

%(title)s

""" FlightCell = u""" """ DepartCell = u""" %(dtstart_time)s %(dtstart_day)s %(dtstart_date)s """ DepartCellFloat = u""" %(dtstart_time)s %(dtstart_day)s %(dtstart_date)s """ ArriveCell = u""" %(dtend_time)s %(dtend_day)s """ ArriveCellFloat = u""" %(dtend_time)s %(dtend_day)s """ GeoTerminalDiv = u"""
%(org_organization-name)s (%(nickname_text)s)
%(adr_extended-address)s
""" GeoDiv = u"""
%(org_organization-name)s (%(nickname_text)s)
""" DocBottom = u"""
FlightDepartArrive
%(carrier)s FLT:%(FLT)d
%(location_text)s
%(EQP)s
SEAT: %(SEAT)s
generated using flightCal.py
See Makefile and other nearby files for details.
""" def progress(*args): import sys for a in args: sys.stderr.write('%s ' % a) sys.stderr.write("\n") def flights(lines, web=None): r""" >>> flts = list(flights(_Test3.split("\n"))) >>> len(flts) 4 >>> flts[0].keys() ['url', 'EQP', 'FLT', 'SEAT', 'note', 'carrier', 'location', 'dur', 'dtstart', 'REF', 'dtend'] >>> flts[1]['location']['text'] 'DALLAS FT WORTH to VANCOUVER BC' >>> flts[1]['dtstart']['float'] '2006-10-03T10:10:00' """ for date, day, fltln, lines in splitEvents(lines): isodate = stdDate(date) if lines[0].startswith("LV"): flt = flightLine(fltln) lv = lvLine(lines[0]) depline = lines[1] flt['dur'] = depline[-10:] if depline.startswith("DEPART"): lv['DEPART'] = depline[depline.index(':')+1:-11].strip() flt['dtstart'] = { 'float': "%sT%s" % (isodate, stdTime(lv['time'])), 'time': lv['time'], 'date': date, 'day': day } flt['EQP'] = lv['EQP'] else: ar = arLine(lines[0]) if 'REF:' in lines[1]: lines[1], flt['REF'] = lines[1].split("REF: ") if lines[1].startswith("ARRIVE:"): ar['ARRIVE'] = lines[1].strip().split("ARRIVE: ")[1] if 'SEAT-' in lines[-1]: txt = lines[-1] flt['SEAT'] = txt.split('SEAT-')[1].split(' ', 1)[0] flt['note'] = txt flt['dtend'] = { 'float': "%sT%s" % (isodate, stdTime(ar['time'])), 'time': ar['time'], 'date': date, 'day': day } flt['url'] = fltlink(flt['carrier'], flt['FLT']) aptlv, aptar = lv['location'], ar['location'] tlv, tar = lv.get('DEPART', None), ar.get('ARRIVE', None) flt['location'] = { 'text': "%s to %s" % (aptlv, aptar), 'lv': mkPlace(aptlv, tlv, web), 'ar': mkPlace(aptar, tar, web), } offsettz(web, flt['dtstart'], flt['location']['lv']) offsettz(web, flt['dtend'], flt['location']['ar']) yield flt def offsettz(web, when, where): """Use latitude/longitude find utc offset for a a local time. >>> offsettz(aptdata.batteries_included_get(), ... {'float': '2006-10-06T15:50:00'}, ... {'geo': {'longitude': -94, 'latitude': 39}}) '2006-10-06T15:50:00-05:00' """ try: lat = where['geo']['latitude'] lon = where['geo']['longitude'] except KeyError: return else: places = list(tzwhere.timezones(web)) # TODO: factor out this lookup tz = tzwhere.closest(lat, lon, places)[2] out = tzwhere.stdtime(tz, when['float']) when['date-time'] = out return out def mkPlace(aptname, terminal=None, web=None): """ >>> mkPlace("CHICAGO OHARE", None) == \\ ... {'org': {'organization-name': 'CHICAGO OHARE'}, \\ ... 'nickname': {'text': 'ORD'}, 'fn': {'text': 'CHICAGO OHARE'}} True Sometimes there are extra spaces in the airport name. >>> mkPlace("VANCOUVER BC", None) == \\ ... {'org': {'organization-name': 'VANCOUVER BC'}, \\ ... 'nickname': {'text': 'YVR'}, 'fn': {'text': 'VANCOUVER BC'}} True Here's how the web arg works. (It's not a unit test because it accesses the web.) mkPlace("CHICAGO OHARE", "Terminal 3", aptdata.batteries_included_get()) """ aptname = ' '.join(aptname.split()) airport = {'org': {'organization-name': aptname}, 'fn': {'text': aptname}} try: iata_nick(airport) except KeyError: progress("unrecognized airport name:", aptname) else: if web: aptdata.airportCard(web, airport['nickname']['text'], airport) if terminal: where = {'adr': {}} # new, more specialized location where.update(airport) # would be nice to fill in country-name from wikipedia too where['adr']['extended-address'] = terminal return where else: return airport def iata_nick(card): """Set nickname to the iata code if the org field of a card is a known airport name """ org = card['org']['organization-name'] for iata, n in Airports: if org == n: card['nickname'] = {'text': iata} return iata raise KeyError def stdDate(ddmmyy): """ >>> stdDate("04 MAR 06") '2006-03-04' """ day, month, year = ddmmyy.split() year = 2000 + int(year) month = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL', 'AUG', 'SEP', 'OCT', 'NOV', 'DEC'].index(month) + 1 day = int(day) return "%04d-%02d-%02d" % (year, month, day) def stdTime(hhmm): """ >>> stdTime("423P") '16:23:00' >>> stdTime("1219P") '12:19:00' """ min = int(hhmm[-3:-1]) hr = int(hhmm[:-3]) if hhmm.endswith("P") and hr < 12: hr += 12 sec = 0 tz = '' # local time return "%02d:%02d:%02d%s" % (hr, min, sec, tz) def fltlink(carrier, num): """ >>> fltlink('AMERICAN AIRLINES', 1557) 'https://bwi.flightview.com/fvSabreVT/fvCPL.exe?vthost=1W&acid=1557&qtype=htm&AL=AA&Find1=Track+Flight' """ # iata airline codes # @@todo: look these up from wikipedia. hmm... how to correlate names? airlines = {'AMERICAN AIRLINES': 'AA', # http://en.wikipedia.org/wiki/Midwest_Airlines 'MIDWEST AIRLINES': 'YX', 'UNITED AIRLINES': 'UA', # http://en.wikipedia.org/wiki/Delta_Air_Lines 'DELTA AIR LINES INC': 'DL', # http://en.wikipedia.org/wiki/US_Airways 'US AIRWAYS': 'US' } airlineCode = airlines[carrier] action = 'https://bwi.flightview.com/fvSabreVT/fvCPL.exe' params = {'AL': airlineCode, 'acid': str(num), 'qtype': 'htm', 'Find1': 'Track+Flight', 'vthost': '1W' } q = '&'.join(['%s=%s' % (n, v) for n, v in params.iteritems()]) return "%s?%s" % (action, q) def splitEvents(lines): r""" split lines of SABRE message each event starts with a date like: 10 DEC 06 - SUNDAY Return an iterator of tuples: date, day, fltline, arlvlines. Disregard '> ' quoting. >>> events = list(splitEvents(TestData.split("\n"))) >>> len(events) 6 >>> events[0][:3] ('25 FEB 06', 'SATURDAY', 'AIR AMERICAN AIRLINES FLT:1557 ECONOMY') >>> events = list(splitEvents(_Test2.split("\n"))) >>> events[0][:3] ('17 JUL 06', 'MONDAY', 'AIR UNITED AIRLINES FLT:1106 ECONOMY') >>> events[2][:3] ('17 JUL 06', 'MONDAY', 'AIR UNITED AIRLINES FLT:534 ECONOMY') >>> events = list(splitEvents(_Test3.split("\n"))) >>> events[0][:3] ('03 OCT 06', 'TUESDAY', 'AIR AMERICAN AIRLINES FLT:1876 ECONOMY') >>> len(events) 8 """ lvlines = arlines = None state = '' for ln in lines: while ln.startswith(">"): ln = ln[1:].strip() ln = ln.strip() m = re.search("(\d\d \w\w\w \d\d) - ([A-Z]+)", ln) if m: date, day = m.group(1), m.group(2) state = '' continue elif ln.startswith("AIR"): if arlines: if lvlines: yield lvdate, lvday, fltline, lvlines yield ardate, arday, fltline, arlines fltline = ln state = '' elif ln.startswith("LV "): lvlines = [ln] lvdate = date lvday = day state = 'LV' elif ln.startswith("AR "): arlines = [ln] ardate = date arday = day state = 'AR' elif state == 'LV': lvlines.append(ln) elif state == 'AR': lvlines.append(ln) arlines.append(ln) if arlines: yield date, day, fltline, lvlines yield date, day, fltline, arlines def lineWith(lines, key): while 1: ln = lines.next() # raises StopIteration ln = ln.strip() if ln.startswith(key): return ln if ln: raise ValueError, ln def flightLine(ln): """ >>> flightLine("AIR AMERICAN AIRLINES FLT:1557 ECONOMY") == \\ ... {'carrier': 'AMERICAN AIRLINES', 'FLT': 1557, 'note': 'ECONOMY'} True >>> flightLine("AIR AMERICAN AIRLINES FLT:98 ECONOMY MULTI MEALS") == \\ ... {'carrier': 'AMERICAN AIRLINES', 'FLT': 98, 'note': 'ECONOMY MULTI MEALS'} True >>> flightLine("AIR DELTA AIR LINES INC FLT:674 ECONOMY") == \\ ... {'note': 'ECONOMY', 'carrier': 'DELTA AIR LINES INC', 'FLT': 674} True """ i = ln[4:].index("FLT:") carrier = ln[4:4+i].strip() flt = ln[4+i:].split()[0] notes = ln[i+4+len(flt):].strip() flt = int(flt.split(':')[1]) return {'carrier': carrier, 'FLT': flt, 'note': notes} def lvLine(ln): r""" >>> lvLine("LV KANSAS CITY INTL 512P EQP: MD-80") ==\ ... {'location': 'KANSAS CITY INTL', 'time': '512P', 'EQP': 'MD-80'} True """ i = ln.index("EQP:") eqp = ln[i+5:] apt, when = ln[3:i].strip().rsplit(None, 1) return {'location': apt, 'time': when, 'EQP': eqp} def arLine(ln): """ >>> arLine("AR CHICAGO OHARE 645P NON-STOP") == \\ ... {'location': 'CHICAGO OHARE', 'time': '645P', 'note': 'NON-STOP'} True """ apt, when, cls = ln[3:].rsplit(None, 2) return {'location': apt, 'time': when, 'note': cls} def _test(): import doctest doctest.testmod() def convert(infp, outfp, web, title): events = flights(infp, web) for s in Template(title, events).generate(): outfp.write(s.encode('utf-8')) def webarg(argv): web = None if '--online' in sys.argv: web = aptdata.WebCache("wikipedia-cache").get del argv[argv.index('--online')] return web if __name__ == '__main__': import sys if '--test' in sys.argv: _test() elif '--testFlights' in sys.argv: import pprint for e in flights(sys.stdin, webarg(sys.argv)): pprint.pprint(e) else: main(sys.argv)