notation3
index
/Users/yosi/CVSROOT/WWW/2000/10/swap/notation3.py

$Id: notation3.py,v 1.195 2007/06/26 02:36:15 syosi Exp $
 
 
This module implements a Nptation3 parser, and the final
part of a notation3 serializer.
 
See also:
 
Notation 3
http://www.w3.org/DesignIssues/Notation3
 
Closed World Machine - and RDF Processor
http://www.w3.org/2000/10/swap/cwm
 
To DO: See also "@@" in comments
 
- Clean up interfaces
______________________________________________
 
Module originally by Dan Connolly, includeing notation3
parser and RDF generator. TimBL added RDF stream model
and N3 generation, replaced stream model with use
of common store/formula API.  Yosi Scharf developped
the module, including tests and test harness.

 
Modules
       
RDFSink
codecs
diag
re
string
sys
triple_maker
types
uripath
urllib

 
Classes
       
RDFSink.RDFSink
ToN3
tmToN3
exceptions.SyntaxError(exceptions.StandardError)
BadSyntax
SinkParser

 
class BadSyntax(exceptions.SyntaxError)
    
Method resolution order:
BadSyntax
exceptions.SyntaxError
exceptions.StandardError
exceptions.Exception

Methods defined here:
__init__(self, uri, lines, str, i, why)
__str__(self)

Data and other attributes inherited from exceptions.SyntaxError:
filename = None
lineno = None
msg = ''
offset = None
print_file_and_line = None
text = None

Methods inherited from exceptions.Exception:
__getitem__(...)

 
class SinkParser
     Methods defined here:
UEscape(self, str, i, startline)
__init__(self, store, openFormula=None, thisDoc='', baseURI=None, genPrefix='', metaURI=None, flags='', why=None)
note: namespace names should *not* end in #;
the # will get added during qname processing
anonymousNode(self, ln)
Remember or generate a term for one of these _: anonymous nodes
bareWord(self, str, i, res)
abc -> :abc
bind(self, qn, uri)
blankNode(self, uri=None)
checkDot(self, str, i)
commaSeparatedList(self, str, j, res, what)
return value: -1 bad syntax; >1 new position in str
res has things found appended
directive(self, str, i)
directiveOrStatement(self, str, h)
endDoc(self)
Signal end of document and stop parsing. returns formula
feed(self, octets)
Feed an octet stream tothe parser
 
if BadSyntax is raised, the string
passed in the exception object is the
remainder after any statements have been parsed.
So if there is more data to feed to the
parser, it should be straightforward to recover.
formula(self)
here(self, i)
String generated from position in file
 
This is for repeatability when refering people to bnodes in a document.
This has diagnostic uses less formally, as it should point one to which 
bnode the arbitrary identifier actually is. It gives the
line and character number of the '[' charcacter or path character
which introduced the blank node. The first blank node is boringly _L1C1.
It used to be used only for tracking, but for tests in general
it makes the canonical ordering of bnodes repeatable.
item(self, str, i, res)
loadBuf(self, buf)
Parses a buffer and returns its top level formula
loadStream(self, stream)
makeStatement(self, quadruple)
node(self, str, i, res, subjectAlready=None)
Parse the <node> production.
Space is now skipped once at the beginning
instead of in multipe calls to skipSpace().
nodeOrLiteral(self, str, i, res)
object(self, str, i, res)
objectList(self, str, i, res)
path(self, str, i, res)
Parse the path production.
prop(self, str, i, res)
property_list(self, str, i, subj)
Parse property list
Leaves the terminating punctuation in the buffer
qname(self, str, i, res)
xyz:def -> ('xyz', 'def')
If not in keywords and keywordsSet: def -> ('', 'def')
:def -> ('', 'def')
setKeywords(self, k)
Takes a list of strings
skipSpace(self, str, i)
Skip white space, newlines and comments.
return -1 if EOF, else position of first non-ws character
startDoc(self)
statement(self, str, i)
strconst(self, str, i, delim)
parse an N3 string constant delimited by delim.
return index, val
subject(self, str, i, res)
tok(self, tok, str, i)
Check for keyword.  Space must have been stripped on entry and
we must not be at end of file.
uEscape(self, str, i, startline)
uri_ref2(self, str, i, res)
Generate uri from n3 representation.
 
Note that the RDF convention of directly concatenating
NS and local name is now used though I prefer inserting a '#'
to make the namesapces look more like what XML folks expect.
variable(self, str, i, res)
?abc -> variable(:abc)
verb(self, str, i, res)
has _prop_
is _prop_ of
a
=
_prop_
>- prop ->
<- prop -<
_operator_

 
class ToN3(RDFSink.RDFSink)
    Serializer output sink for N3
 
keeps track of most recent subject and predicate reuses them.
Adapted from Dan's ToRDFParser(Parser);
 
  Methods defined here:
__init__(self, write, base=None, genPrefix=None, noLists=0, quiet=0, flags='')
bind(self, prefixString, uri)
Just accepting a convention here
dummyClone(self)
retun a version of myself which will only count occurrences
endAnonymous(self, subject, verb)
endAnonymousNode(self, subj=None)
endDoc(self, rootFormulaPair=None)
endFormulaObject(self, pred, subj)
endFormulaSubject(self, subj)
endListObject(self, subject, verb)
endListSubject(self, subj=None)
makeComment(self, str)
makeStatement(self, triple, why=None, aIsPossible=1)
representationOf(self, context, pair)
Representation of a thing in the output stream
 
Regenerates genids if required.
Uses prefix dictionary to use qname syntax if possible.
setDefaultNamespace(self, uri)
startAnonymous(self, triple)
startAnonymousNode(self, subj)
startDoc(self)
startFormulaObject(self, triple)
startFormulaSubject(self, context)
startListObject(self, triple)
startListSubject(self, subj)
writeEncoded(self, str)
Write a possibly unicode string out to the output

Data and other attributes defined here:
flagDocumentation = 'Flags for N3 output are as follows:-\n \na ...a existentially qualified explicitly named node.\n'

Methods inherited from RDFSink.RDFSink:
checkNewId(self, uri)
The store can override this to raise an exception if the
id is not in fact new. This is useful because it is usfeul
to generate IDs with useful diagnostic ways but this lays them
open to possibly clashing in pathalogical cases.
countNamespace(self, namesp)
On output, count how many times each namespace is used
genId(self)
intern(self, something)
namespaceCounts(self)
newBlankNode(self, context, uri=None, why=None)
newExistential(self, context, uri=None, why=None)
newFormula(self, uri=None)
newList(self, l, context)
newLiteral(self, str, dt=None, lang=None)
newSymbol(self, uri)
newUniversal(self, context, uri=None, why=None)
newXMLLiteral(self, doc)
reopen(self)
Un-End a document
 
If you have added stuff to a document, thought you were done, and
then want to add more, call this to get back into the sate that makeSatement
is again acceptable. Remember to end the document again when done.
setGenPrefix(self, genPrefix)

 
class tmToN3(RDFSink.RDFSink)
     Methods defined here:
IsOf(self)
__init__(self, write, base=None, genPrefix=None, noLists=0, quiet=0, flags='')
addAnonymous(self, Id)
If an anonymous shows up more than once, this is the
function to call
addLiteral(self, lit, dt=None, lang=None)
addNode(self, node)
addQuestionMarkedSymbol(self, sym)
addSymbol(self, sym)
backwardPath(self)
beginAnonymous(self)
beginFormula(self)
beginList(self)
bind(self, prefixString, uri)
Just accepting a convention here
checkIsOf(self)
declareExistential(self, sym)
declareUniversal(self, sym)
end(self)
endAnonymous(self)
endFormula(self)
endList(self)
endStatement(self)
forewardPath(self)
setDefaultNamespace(self, uri)
start(self)
symbolString(self, value)
writeEncoded(self, str)
Write a possibly unicode string out to the output

Methods inherited from RDFSink.RDFSink:
checkNewId(self, uri)
The store can override this to raise an exception if the
id is not in fact new. This is useful because it is usfeul
to generate IDs with useful diagnostic ways but this lays them
open to possibly clashing in pathalogical cases.
countNamespace(self, namesp)
On output, count how many times each namespace is used
endDoc(self, rootFormulaPair)
End a document
 
Call this once only at the end of parsing so that the receiver can wrap
things up, oprimize, intern, index and so on.  The pair given is the (type, value)
identifier of the root formula of the thing parsed.
genId(self)
intern(self, something)
makeComment(self, str)
This passes on a comment line which of course has no semantics.
 
This is only useful in direct piping of parsers to output, to preserve
comments in the original file.
makeStatement(self, tuple, why=None)
add a statement to a stream/store.
 
raises URISyntaxError on bad URIs
tuple is a quad (context, predicate, subject, object) of things generated by calls to newLiteral etc
why is reason for the statement.
namespaceCounts(self)
newBlankNode(self, context, uri=None, why=None)
newExistential(self, context, uri=None, why=None)
newFormula(self, uri=None)
newList(self, l, context)
newLiteral(self, str, dt=None, lang=None)
newSymbol(self, uri)
newUniversal(self, context, uri=None, why=None)
newXMLLiteral(self, doc)
reopen(self)
Un-End a document
 
If you have added stuff to a document, thought you were done, and
then want to add more, call this to get back into the sate that makeSatement
is again acceptable. Remember to end the document again when done.
setGenPrefix(self, genPrefix)
startDoc(self)

 
Functions
       
backslashUify(ustr)
Use URL encoding to return an ASCII string corresponding
to the given unicode
dummy()
dummyWrite(x)
hexify(ustr)
Use URL encoding to return an ASCII string
corresponding to the given UTF8 string
 
>>> hexify("http://example/a b")
'http://example/a%20b'
nothing()
stringToN3(str, singleLine=0, flags='')
#"
stripCR(str)
toBool(s)

 
Data
        ADDED_HASH = '#'
ALL4 = (0, 1, 2, 3)
ANONYMOUS = 3
BOOLEAN_DATATYPE = 'http://www.w3.org/2001/XMLSchema#boolean'
CONTEXT = 0
DAML_sameAs = (0, 'http://www.w3.org/2002/07/owl#sameAs')
DAML_sameAs_URI = 'http://www.w3.org/2002/07/owl#sameAs'
DECIMAL_DATATYPE = 'http://www.w3.org/2001/XMLSchema#decimal'
Escapes = {'"': '"', r'\': r'\', 'a': '\x07', 'b': '\x08', 'f': '\x0c', 'n': '\n', 'r': '\r', 't': '\t', 'v': '\x0b'}
FLOAT_DATATYPE = 'http://www.w3.org/2001/XMLSchema#double'
INTEGER_DATATYPE = 'http://www.w3.org/2001/XMLSchema#integer'
LIST = 10000
LITERAL = 2
LITERAL_DT = 21
LITERAL_LANG = 22
LOG_implies_URI = 'http://www.w3.org/2000/10/swap/log#implies'
List_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
Logic_NS = 'http://www.w3.org/2000/10/swap/log#'
N3CommentCharacter = '#'
N3_Empty = (0, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#Empty')
N3_List = (0, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#List')
N3_first = (0, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#first')
N3_forAll_URI = 'http://www.w3.org/2000/10/swap/log#forAll'
N3_forSome_URI = 'http://www.w3.org/2000/10/swap/log#forSome'
N3_li = (0, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#li')
N3_nil = (0, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil')
N3_rest = (0, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#rest')
OBJ = 3
PARTS = (1, 2, 3)
PRED = 1
QUESTION = 10001
RDF_NS_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
RDF_spec = 'http://www.w3.org/TR/REC-rdf-syntax/'
RDF_type = (0, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type')
RDF_type_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
SUBJ = 2
SYMBOL = 0
XMLLITERAL = 25
becauseSubexpression = <why.BecauseSubexpression instance>
digitstring = <_sre.SRE_Pattern object>
eof = <_sre.SRE_Pattern object>
eol = <_sre.SRE_Pattern object>
forbidden1 = <_sre.SRE_Pattern object>
forbidden2 = <_sre.SRE_Pattern object>
interesting = <_sre.SRE_Pattern object>
langcode = <_sre.SRE_Pattern object>
number_syntax = <_sre.SRE_Pattern object>
option_noregen = 0
parsesTo_URI = 'http://www.w3.org/2000/10/swap/log#parsesTo'
signed_integer = <_sre.SRE_Pattern object>
wide_build = False
ws = <_sre.SRE_Pattern object>