#!/usr/bin/python """ A custom SPARQL client library written in Python. This particular implementation is designed to work in conjunction with a custom SPARQL endpoint server, which supports logging, OpenID authentication, and SSL security. Due to the way client authentication is handled, this library almost certainly will not work with other SPARQL endpoint servers. @authors: Mike Stunes """ # Authenticating SPARQL client # sparql_client.py # Mike Stunes # This rev. began 10 Aug 2008 # $Id: sparql_client.py 24931 2008-08-27 17:43:46Z stunes $ # TODOs # ### Supporting libraries import sys import urllib import urllib2 import urlparse from urlparse import urlparse import StringIO import BaseHTTPServer from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler import httplib from httplib import HTTPConnection, HTTPSConnection import webbrowser import urlparse from urlparse import urlparse ### Globals debug = True ### Classes # Special HTTP server class, that acts as the client's listener # This is for the client's browser to send it a signal that authentication # is done class client_listener(HTTPServer): """ Class that acts as the client's listener, so it can receive a signal that authentication was successful. """ def __init__(self, successCallback, failCallback, *args, **kwargs): """ @param successCallback: Function to call when authentication is successful @type successCallback: function @param failCallback: Function to call when authentication is not successful @type failCallback: function """ HTTPServer.__init__(self, *args, **kwargs) self.successCallback = successCallback self.failCallback = failCallback # Corresponding request handler class class client_handler(BaseHTTPRequestHandler): """ Request handler for client_listener class. """ successPage = """
Your authentication was successful. You may now close your browser and execute queries with your client.
""" failPage = """Your authentication failed to complete successfully, either because your OpenID provider has failed to authenticate you, or because you are not authorized to access this server. You may try it again if you like, by executing authenticate() again.
""" def do_GET(self): # Parse the incoming request # all we really need is the "auth" param passed in self.parsed_uri = urlparse(self.path) if self.parsed_uri[4] != '': self.params = dict([part.split('=') for part in self.parsed_uri[4].split('&')]) else: self.params = dict() try: if self.params['auth'] == "True": self.doAuthSuccess() else: self.doAuthFail() except KeyError: self.doAuthFail() def doAuthSuccess(self): self.send_response(200) self.send_header('Content-Type', 'text/html') self.send_header('Content-Length', len(self.successPage)) self.end_headers() self.wfile.write(self.successPage) self.server.successCallback() def doAuthFail(self): self.send_response(401) self.send_header('Content-Type', 'text/html') self.send_header('Content-Length', len(self.successPage)) self.end_headers() self.wfile.write(self.failPage) self.server.failCallback() # Custom urllib handler, to pick up a redirect without following it class fetch_handler(urllib2.HTTPRedirectHandler): """ A custom urllib2 handler, to handle receiving redirects without following them. """ def http_error_303(self, req, fp, code, msg, headers): # Need to create an object like what urllib returns # this is kinda hackish # Scratch that, no it isn't result = fp # Don't know if these come defined already... it'd be handy # result.geturl = lambda self: req.get_full_url() # result.info = lambda self: headers result.status = code return result ### Instances of custom utilities # Custom URL opener custom_opener = urllib2.build_opener(fetch_handler()) # Localhost HTTP listener # listener = client_listener(('localhost', 9876), client_handler) ### Actual module classes # Wrapper class class SparqlWrapper(object): """ Class that encapsulates an interface to an authenticating SPARQL endpoint. @ivar uri: base URI of the SPARQL endpoint @ivar openid_url: OpenID identifier with which to authenticate @ivar query_string: Query string @ivar sessionID: Session ID received from the server @ivar extratags: Any extra tags to be added to the request URI @ivar authenticated: State of authentication with the server """ def __init__(self, uri): """ @param uri: base URI of the SPARQL endpoint (do not include the '/sparql' @type uri: str """ self.uri = uri self.openid_url = '' self.query_string = '' self.extratags = [] self.authenticated = False self.sessionID = '' def authenticate(self, openid_url): """ This function initiates the client->server authentication process. @param openid_url: OpenID identifier with which to authenticate @type openid_url: str """ # Basic method: # Open listener on port 9876 # Make request for base_url/getSession?openid_url=... # Grab response, check for redirect status # if anything else, handle appropriately # Open browser pointing to given URL # Wait for response # Ideally, return once authentication is successful? # server.handle_request() is the correct thing to do here # TODO: implement self.openid_url = openid_url # Fetch document at base_url/getSession?openid_url... # this _should_ be a redirect, that we snatch up and hand over to a browser request_uri = '%s/getSession?openid_url=%s' % (self.uri, urllib.quote(openid_url)) if debug: sys.stderr.write("About to fetch document at URI %s\n" % (request_uri)) response = custom_opener.open(request_uri) if debug: sys.stderr.write("Resource fetched\n") # print "Got this request info: %s" % (dir(response.info())) # print "type of response.info() is %s" % (type(response.info())) # Check that we got a redirect if not response.status == 303: # Didn't get a redirect like expected... hmmm... pass else: # Get the session ID from the request redirect_uri = response.info().getheader('Location') # Need to parse the crap out of it to do this parsed_redir_uri = urlparse(redirect_uri) # There should only be one parameter in the redirect URL, but we'll play it safe if parsed_redir_uri[4] != '': params = dict([part.split('=') for part in parsed_redir_uri[4].split('&')]) else: # Malformed redirect URL, or some future capability I haven't thought of yet params = {} # Finally, dig out the session ID self.sessionID = params['sessionID'] # Open the destination URL in a browser if debug: sys.stderr.write("Opening browser pointed at %s\n" % (redirect_uri)) if float(sys.version[:3]) >= 2.5: # New tab if possible # N.B. only in Python >= 2.5 webbrowser.open_new_tab(redirect_uri) else: webbrowser.open_new(redirect_uri) # Create the listener listener = client_listener(self.finish_auth_success, self.finish_auth_fail, ('localhost', 9876), client_handler) # Now, set up the listener and open it listener.handle_request() # Then destroy it when that's done listener = None del listener def finish_auth_success(self): """ Helper function used by authenticate() to set internal variables when authentication finishes. """ self.authenticated = True def finish_auth_fail(self): """ Helper function used by authenticate() to set internal variables when authentication fails. """ self.authenticated = False raise AuthenticationError def setQueryString(self, query_string): """ Sets the query string. @param query_string: Query string to use. @type query_string: str """ self.query_string = query_string def setReturnFormat(self, return_format): """ Sets the return format of the query results. Currently not implemented in either this library or the server. @param return_format: Return format for results. @type return_format: TBD """ # TODO: something raise NotImplementedError def addExtraURITag(self, key, value): """ Adds an extra key/value pair to the query URI. Currently not used for anything. @param key: Key to be added to the URI @type key: string @param value: Value of the key to be added to the URI @type value: string """ self.extratags.append((key, value)) def query(self): """ Runs a query and returns the results. @rtype: Instance of QueryResult """ if self.query_string == '': raise SyntaxException("No query string specified.") if not self.authenticated: raise AuthenticationError("Not yet authenticated to the server.") # Create the URI to request via HTTP params = [] params.append(('query', self.query_string)) params.append(('sessionID', self.sessionID)) URI = self.uri + '/sparql?' + urllib.urlencode(params) if debug: sys.stderr.write("Querying URI %s\n" % (URI)) # Fetch, store into a string, and create a stream that reads from the string # We do this to immediately fetch all of the data and close the connection resultStream = urllib2.urlopen(URI) url = resultStream.geturl() info = resultStream.info() resultString = resultStream.read() # TODO: do AIR reasoning here? return SparqlResults( StringIO.StringIO(resultString), url, info) # Results class class SparqlResults(object): """ Class encapsulating the returned results from running a SPARQL query. This can serve as a generator, by mapping __iter__() and next() to the respective methods of self.results, which is a file-like object. @ivar results: results from running the query @ivar url: URL of requested object @ivar info: Additional metadata from the HTTP request that generated results """ def __init__(self, results, url, info): """ @param results: results from running the query @type results: file-like object @param url: URL of requested object @type url: string @param info: Additional metadata from HTTP request that generated results @type info: dictionary-like object """ self.results = results self.url = url self.info = info def getURL(self): """ Returns stored URL of results. """ return self.url def getInfo(self): """ Returns stored HTTP request metadata. """ return self.info def __iter__(self): return self.results.__iter__() def next(self): return self.results.next() ### Custom exceptions class SyntaxException(ValueError): """ Exception for query syntax errors. """ pass class AuthenticationError(Exception): """ Exception for authentication problems. """ pass