# Using wikipedia data, find UTC offset at an airport at a given time. # # $Revision: 2991 $ of $Date: 2007-04-12 00:03:27 -0400 (Thu, 12 Apr 2007) $ # -*- coding: utf-8 -*- import re, math, struct import xml.dom.minidom from datetime import datetime from dateutil.tz import tzfile # http://labix.org/python-dateutil import aptdata DATAPG = 'http://en.wikipedia.org/wiki/List_of_tz_zones_by_country' minus = u'\u2212' # who's bright idea was it to use this funky - sign? def main(argv): if '--module' in argv: mkmodule() else: lookup(argv[1], argv[2:]) def lookup(when, codes): web = aptdata.WebCache("wikipedia-cache").get import pprint places = list(timezones(web, exclude=["/Indiana"])) for code in codes: card = aptdata.airportCard(web, code) pprint.pprint(card) lat = card['geo']['latitude'] lon = card['geo']['longitude'] place = closest(lat, lon, places) print place print stdtime(place[2], when) def closest(lat, lon, places): best = None dist2 = None #square of distance for place in places: d = distance(lat, lon, place[1][0], place[1][1]) if best is None or d < dist2: dist2 = d best = place return best def distance(lat_1, long_1, lat_2, long_2): # thanks http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/393241 # Submitter: Kevin Ryan (other recipes) # Last Updated: 2006/04/25 lat_1, long_1, lat_2, long_2 = [ v * math.pi / 180.0 for v in lat_1, long_1, lat_2, long_2] dlong = long_2 - long_1 dlat = lat_2 - lat_1 a = (math.sin(dlat / 2))**2 + math.cos(lat_1) * math.cos(lat_2) \ * (math.sin(dlong / 2))**2 return 2 * math.asin(min(1, math.sqrt(a))) def timezones(web, exclude=[]): """iterate over timezones in wikipedia @param web: a function from URIs to info, content pairs @param exclude: exclude timezones with these strings in the name, e.g. convexity exceptions like Indiana """ info, content = web(DATAPG) doc = xml.dom.minidom.parseString(content) for t in doc.getElementsByTagName("table"): headings = t.getElementsByTagName("tr")[0] if getText(headings.getElementsByTagName("th")[2].childNodes) != "TZ": #irrelevant table continue for row in t.getElementsByTagName("tr")[1:]: values = [getText(td.childNodes) for td in row.getElementsByTagName("td")] if len(values) >= 3: country, coords, tz = values[:3] for s in exclude: if s in tz: break else: yield country, latlong(coords), tz def latlong(coords): """decode ISO 6709. ugh. >>> latlong("-1247+04514") (-12.783333333333333, 45.233333333333334) >>> latlong("-690022+0393524") (-69.00611111111111, 39.590000000000003) """ m = re.search(r'([^\d])(\d+)([^\d])(\d+)', coords) if not m: raise ValueError, coords return coord(m.group(1), m.group(2)), coord(m.group(3), m.group(4)) def coord(sign, digits): if len(digits) == 4: d, m, s = int(digits[:2]), int(digits[2:]), 0 elif len(digits) == 5: d, m, s = int(digits[:3]), int(digits[3:]), 0 elif len(digits) == 6: d, m, s = int(digits[:2]), int(digits[2:4]), int(digits[4:]) elif len(digits) == 7: d, m, s = int(digits[:3]), int(digits[3:5]), int(digits[5:]) else: raise RuntimeError, "not implemented", digits if sign == '+': kludge = 'N' else: kludge = 'S' return aptdata.dms(kludge, d, m, s) def getText(nodelist): """from /usr/share/doc/python2.4/html/lib/minidom-example.txt""" rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def stdtime(tz, when, zoneinfo="/usr/share/zoneinfo" #zoneinfo="/Users/connolly/lib/zoneinfo/etc/zoneinfo" ): """Use /usr/share/zoneinfo to interpret a time in a timezone. >>> stdtime("America/Chicago", "2007-04-02T21:53:27") '2007-04-02T21:53:27-05:00' """ year, month, day, hour, min, sec=[int(s) for s in struct.unpack('4sx2sx2sx2sx2sx2s', when)] return datetime(year, month, day, hour, min, sec, tzinfo=tzfile("%s/%s" % (zoneinfo, tz)) ).isoformat() ############ RDF='http://www.w3.org/1999/02/22-rdf-syntax-ns#' GEO="http://www.w3.org/2003/01/geo/wgs84_pos#" OWL="http://www.w3.org/2002/07/owl#" SUMO="http://www.ontologyportal.org/translations/SUMO.owl.txt#" RDFS="http://www.w3.org/2000/01/rdf-schema#" def tzjson(path=",tz.rdf"): """return tuple of timezone primary locations as (tz, lat, lon) """ doc = xml.dom.minidom.parseString(file(path).read()) records = [] for city in doc.getElementsByTagNameNS(RDF, "RDF")[0] \ .getElementsByTagNameNS(OWL, "Class")[0] \ .getElementsByTagNameNS(OWL, "oneOf")[0] \ .getElementsByTagNameNS(SUMO, "City"): lat = float(getText(city.getElementsByTagNameNS(GEO, "lat")[0].childNodes)) lon = float(getText(city.getElementsByTagNameNS(GEO, "long")[0].childNodes)) tz = getText(city.getElementsByTagNameNS(SUMO, "administrativeCenter")[0]\ .getElementsByTagNameNS(RDFS, "label")[0].childNodes ) records.append((str(tz), lat, lon)) return tuple(records) def nearest_tz(lat, lon, zones): def d(tzrec): if 'Indiana' in tzrec[0]: return 10000 # KLUDGE to get Chicago from KC return distance(lat, lon, tzrec[1], tzrec[2]) return optimize(zones, d) def optimize(seq, metric): best = None m = None for candidate in seq: x = metric(candidate) if best is None or x < m: m = x best = candidate return best def grddl_mini(outfn, src, txform, xsltproc="xsltproc"): import os, sys print >>sys.stderr, "running %s on %s" % (txform, src) #@@ check return value os.system("%s --output '%s' '%s' '%s'" % (xsltproc, outfn, txform, src)) def mkmodule(rdfdoc="tz.rdf", txform="tzcountrydata.xsl"): import pprint grddl_mini(rdfdoc, DATAPG, txform) zones = tzjson(rdfdoc) print '""" based on %s' % DATAPG print '"""' print "TimeZoneLocations = \\" pprint.pprint(zones) def _testModule(lat, lon): import tz_by_country zones = tz_by_country.TimeZoneLocations print nearest_tz(lat, lon, zones) def _testXML(): import pprint zones = tzjson() print nearest_tz(float(sys.argv[2]), float(sys.argv[3]), zones) def _test(): import doctest doctest.testmod() if __name__ == '__main__': import sys if '--test' in sys.argv: _test() elif '--testXML' in sys.argv: _testXML() elif '--testPy' in sys.argv: _testModule(float(sys.argv[2]), float(sys.argv[3])) else: main(sys.argv)