source: OpenRLabs-Git/deploy/rlabs-docker/web2py-rlabs/gluon/contrib/pysimplesoap/c14n.py

main
Last change on this file was 42bd667, checked in by David Fuertes <dfuertes@…>, 4 years ago

Historial Limpio

  • Property mode set to 100755
File size: 16.1 KB
Line 
1#! /usr/bin/env python
2'''XML Canonicalization
3
4Patches Applied to xml.dom.ext.c14n:
5    http://sourceforge.net/projects/pyxml/
6
7    [ 1444526 ] c14n.py: http://www.w3.org/TR/xml-exc-c14n/ fix
8        -- includes [ 829905 ] c14n.py fix for bug #825115,
9           Date Submitted: 2003-10-24 23:43
10        -- include dependent namespace declarations declared in ancestor nodes
11           (checking attributes and tags),
12        -- handle InclusiveNamespaces PrefixList parameter
13
14This module generates canonical XML of a document or element.
15    http://www.w3.org/TR/2001/REC-xml-c14n-20010315
16and includes a prototype of exclusive canonicalization
17    http://www.w3.org/Signature/Drafts/xml-exc-c14n
18
19Requires PyXML 0.7.0 or later.
20
21Known issues if using Ft.Lib.pDomlette:
22    1. Unicode
23    2. does not white space normalize attributes of type NMTOKEN and ID?
24    3. seems to be include "\n" after importing external entities?
25
26Note, this version processes a DOM tree, and consequently it processes
27namespace nodes as attributes, not from a node's namespace axis. This
28permits simple document and element canonicalization without
29XPath. When XPath is used, the XPath result node list is passed and used to
30determine if the node is in the XPath result list, but little else.
31
32Authors:
33    "Joseph M. Reagle Jr." <reagle@w3.org>
34    "Rich Salz" <rsalz@zolera.com>
35
36$Date: 2006-03-30 23:47:16 +0000 (Thu, 30 Mar 2006) $ by $Author: boverhof $
37'''
38
39_copyright = '''Copyright 2001, Zolera Systems Inc.  All Rights Reserved.
40Copyright 2001, MIT. All Rights Reserved.
41
42Distributed under the terms of:
43  Python 2.0 License or later.
44  http://www.python.org/2.0.1/license.html
45or
46  W3C Software License
47  http://www.w3.org/Consortium/Legal/copyright-software-19980720
48'''
49
50import string
51from xml.dom import Node
52try:
53    from xml.ns import XMLNS
54except:
55    class XMLNS:
56        BASE = "http://www.w3.org/2000/xmlns/"
57        XML = "http://www.w3.org/XML/1998/namespace"
58try:
59    import cStringIO
60    StringIO = cStringIO
61except ImportError:
62    import StringIO
63
64_attrs = lambda E: (E.attributes and E.attributes.values()) or []
65_children = lambda E: E.childNodes or []
66_IN_XML_NS = lambda n: n.name.startswith("xmlns")
67_inclusive = lambda n: n.unsuppressedPrefixes == None
68
69
70# Does a document/PI has lesser/greater document order than the
71# first element?
72_LesserElement, _Element, _GreaterElement = range(3)
73
74def _sorter(n1,n2):
75    '''_sorter(n1,n2) -> int
76    Sorting predicate for non-NS attributes.'''
77
78    i = cmp(n1.namespaceURI, n2.namespaceURI)
79    if i: return i
80    return cmp(n1.localName, n2.localName)
81
82
83def _sorter_ns(n1,n2):
84    '''_sorter_ns((n,v),(n,v)) -> int
85    "(an empty namespace URI is lexicographically least)."'''
86
87    if n1[0] == 'xmlns': return -1
88    if n2[0] == 'xmlns': return 1
89    return cmp(n1[0], n2[0])
90
91def _utilized(n, node, other_attrs, unsuppressedPrefixes):
92    '''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
93    Return true if that nodespace is utilized within the node'''
94    if n.startswith('xmlns:'):
95        n = n[6:]
96    elif n.startswith('xmlns'):
97        n = n[5:]
98    if (n=="" and node.prefix in ["#default", None]) or \
99        n == node.prefix or n in unsuppressedPrefixes:
100            return 1
101    for attr in other_attrs:
102        if n == attr.prefix: return 1
103    # For exclusive need to look at attributes
104    if unsuppressedPrefixes is not None:
105        for attr in _attrs(node):
106            if n == attr.prefix: return 1
107           
108    return 0
109
110
111def _inclusiveNamespacePrefixes(node, context, unsuppressedPrefixes):
112    '''http://www.w3.org/TR/xml-exc-c14n/
113    InclusiveNamespaces PrefixList parameter, which lists namespace prefixes that
114    are handled in the manner described by the Canonical XML Recommendation'''
115    inclusive = []
116    if node.prefix:
117        usedPrefixes = ['xmlns:%s' %node.prefix]
118    else:
119        usedPrefixes = ['xmlns']
120
121    for a in _attrs(node):
122        if a.nodeName.startswith('xmlns') or not a.prefix: continue
123        usedPrefixes.append('xmlns:%s' %a.prefix)
124
125    unused_namespace_dict = {}
126    for attr in context:
127        n = attr.nodeName
128        if n in unsuppressedPrefixes:
129            inclusive.append(attr)
130        elif n.startswith('xmlns:') and n[6:] in unsuppressedPrefixes:
131            inclusive.append(attr)
132        elif n.startswith('xmlns') and n[5:] in unsuppressedPrefixes:
133            inclusive.append(attr)
134        elif attr.nodeName in usedPrefixes:
135            inclusive.append(attr)
136        elif n.startswith('xmlns:'):
137            unused_namespace_dict[n] = attr.value
138
139    return inclusive, unused_namespace_dict
140
141#_in_subset = lambda subset, node: not subset or node in subset
142_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak
143
144
145class _implementation:
146    '''Implementation class for C14N. This accompanies a node during it's
147    processing and includes the parameters and processing state.'''
148
149    # Handler for each node type; populated during module instantiation.
150    handlers = {}
151
152    def __init__(self, node, write, **kw):
153        '''Create and run the implementation.'''
154        self.write = write
155        self.subset = kw.get('subset')
156        self.comments = kw.get('comments', 0)
157        self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
158        nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE })
159       
160        # Processing state.
161        self.state = (nsdict, {'xml':''}, {}, {}) #0422
162       
163        if node.nodeType == Node.DOCUMENT_NODE:
164            self._do_document(node)
165        elif node.nodeType == Node.ELEMENT_NODE:
166            self.documentOrder = _Element        # At document element
167            if not _inclusive(self):
168                inherited,unused = _inclusiveNamespacePrefixes(node, self._inherit_context(node),
169                                self.unsuppressedPrefixes)
170                self._do_element(node, inherited, unused=unused)
171            else:
172                inherited = self._inherit_context(node)
173                self._do_element(node, inherited)
174        elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
175            pass
176        else:
177            raise TypeError(str(node))
178
179
180    def _inherit_context(self, node):
181        '''_inherit_context(self, node) -> list
182        Scan ancestors of attribute and namespace context.  Used only
183        for single element node canonicalization, not for subset
184        canonicalization.'''
185
186        # Collect the initial list of xml:foo attributes.
187        xmlattrs = filter(_IN_XML_NS, _attrs(node))
188
189        # Walk up and get all xml:XXX attributes we inherit.
190        inherited, parent = [], node.parentNode
191        while parent and parent.nodeType == Node.ELEMENT_NODE:
192            for a in filter(_IN_XML_NS, _attrs(parent)):
193                n = a.localName
194                if n not in xmlattrs:
195                    xmlattrs.append(n)
196                    inherited.append(a)
197            parent = parent.parentNode
198        return inherited
199
200
201    def _do_document(self, node):
202        '''_do_document(self, node) -> None
203        Process a document node. documentOrder holds whether the document
204        element has been encountered such that PIs/comments can be written
205        as specified.'''
206
207        self.documentOrder = _LesserElement
208        for child in node.childNodes:
209            if child.nodeType == Node.ELEMENT_NODE:
210                self.documentOrder = _Element        # At document element
211                self._do_element(child)
212                self.documentOrder = _GreaterElement # After document element
213            elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
214                self._do_pi(child)
215            elif child.nodeType == Node.COMMENT_NODE:
216                self._do_comment(child)
217            elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
218                pass
219            else:
220                raise TypeError(str(child))
221    handlers[Node.DOCUMENT_NODE] = _do_document
222
223
224    def _do_text(self, node):
225        '''_do_text(self, node) -> None
226        Process a text or CDATA node.  Render various special characters
227        as their C14N entity representations.'''
228        if not _in_subset(self.subset, node): return
229        s = string.replace(node.data, "&", "&amp;")
230        s = string.replace(s, "<", "&lt;")
231        s = string.replace(s, ">", "&gt;")
232        s = string.replace(s, "\015", "&#xD;")
233        if s: self.write(s)
234    handlers[Node.TEXT_NODE] = _do_text
235    handlers[Node.CDATA_SECTION_NODE] = _do_text
236
237
238    def _do_pi(self, node):
239        '''_do_pi(self, node) -> None
240        Process a PI node. Render a leading or trailing #xA if the
241        document order of the PI is greater or lesser (respectively)
242        than the document element.
243        '''
244        if not _in_subset(self.subset, node): return
245        W = self.write
246        if self.documentOrder == _GreaterElement: W('\n')
247        W('<?')
248        W(node.nodeName)
249        s = node.data
250        if s:
251            W(' ')
252            W(s)
253        W('?>')
254        if self.documentOrder == _LesserElement: W('\n')
255    handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
256
257
258    def _do_comment(self, node):
259        '''_do_comment(self, node) -> None
260        Process a comment node. Render a leading or trailing #xA if the
261        document order of the comment is greater or lesser (respectively)
262        than the document element.
263        '''
264        if not _in_subset(self.subset, node): return
265        if self.comments:
266            W = self.write
267            if self.documentOrder == _GreaterElement: W('\n')
268            W('<!--')
269            W(node.data)
270            W('-->')
271            if self.documentOrder == _LesserElement: W('\n')
272    handlers[Node.COMMENT_NODE] = _do_comment
273
274
275    def _do_attr(self, n, value):
276        ''''_do_attr(self, node) -> None
277        Process an attribute.'''
278
279        W = self.write
280        W(' ')
281        W(n)
282        W('="')
283        s = string.replace(value, "&", "&amp;")
284        s = string.replace(s, "<", "&lt;")
285        s = string.replace(s, '"', '&quot;')
286        s = string.replace(s, '\011', '&#x9')
287        s = string.replace(s, '\012', '&#xA')
288        s = string.replace(s, '\015', '&#xD')
289        W(s)
290        W('"')
291
292
293    def _do_element(self, node, initial_other_attrs = [], unused = None):
294        '''_do_element(self, node, initial_other_attrs = [], unused = {}) -> None
295        Process an element (and its children).'''
296
297        # Get state (from the stack) make local copies.
298        #   ns_parent -- NS declarations in parent
299        #   ns_rendered -- NS nodes rendered by ancestors
300        #        ns_local -- NS declarations relevant to this element
301        #   xml_attrs -- Attributes in XML namespace from parent
302        #       xml_attrs_local -- Local attributes in XML namespace.
303        #   ns_unused_inherited -- not rendered namespaces, used for exclusive
304        ns_parent, ns_rendered, xml_attrs = \
305                self.state[0], self.state[1].copy(), self.state[2].copy() #0422
306               
307        ns_unused_inherited = unused
308        if unused is None:
309            ns_unused_inherited = self.state[3].copy()
310           
311        ns_local = ns_parent.copy()
312        inclusive = _inclusive(self)
313        xml_attrs_local = {}
314
315        # Divide attributes into NS, XML, and others.
316        other_attrs = []
317        in_subset = _in_subset(self.subset, node)
318        for a in initial_other_attrs + _attrs(node):
319            if a.namespaceURI == XMLNS.BASE:
320                n = a.nodeName
321                if n == "xmlns:": n = "xmlns"        # DOM bug workaround
322                ns_local[n] = a.nodeValue
323            elif a.namespaceURI == XMLNS.XML:
324                if inclusive or (in_subset and  _in_subset(self.subset, a)): #020925 Test to see if attribute node in subset
325                    xml_attrs_local[a.nodeName] = a #0426
326            else:
327                if  _in_subset(self.subset, a):     #020925 Test to see if attribute node in subset
328                    other_attrs.append(a)
329                   
330#                # TODO: exclusive, might need to define xmlns:prefix here
331#                if not inclusive and a.prefix is not None and not ns_rendered.has_key('xmlns:%s' %a.prefix):
332#                    ns_local['xmlns:%s' %a.prefix] = ??
333
334            #add local xml:foo attributes to ancestor's xml:foo attributes
335            xml_attrs.update(xml_attrs_local)
336
337        # Render the node
338        W, name = self.write, None
339        if in_subset:
340            name = node.nodeName
341            if not inclusive:
342                if node.prefix is not None:
343                    prefix = 'xmlns:%s' %node.prefix
344                else:
345                    prefix = 'xmlns'
346                   
347                if not ns_rendered.has_key(prefix) and not ns_local.has_key(prefix):
348                    if not ns_unused_inherited.has_key(prefix):
349                        raise RuntimeError(\
350                            'For exclusive c14n, unable to map prefix "%s" in %s' %(
351                            prefix, node))
352                   
353                    ns_local[prefix] = ns_unused_inherited[prefix]
354                    del ns_unused_inherited[prefix]
355               
356            W('<')
357            W(name)
358
359            # Create list of NS attributes to render.
360            ns_to_render = []
361            for n,v in ns_local.items():
362
363                # If default namespace is XMLNS.BASE or empty,
364                # and if an ancestor was the same
365                if n == "xmlns" and v in [ XMLNS.BASE, '' ] \
366                and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]:
367                    continue
368
369                # "omit namespace node with local name xml, which defines
370                # the xml prefix, if its string value is
371                # http://www.w3.org/XML/1998/namespace."
372                if n in ["xmlns:xml", "xml"] \
373                and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
374                    continue
375
376
377                # If not previously rendered
378                # and it's inclusive  or utilized
379                if (n,v) not in ns_rendered.items():
380                    if inclusive or _utilized(n, node, other_attrs, self.unsuppressedPrefixes):
381                        ns_to_render.append((n, v))
382                    elif not inclusive:
383                        ns_unused_inherited[n] = v
384
385            # Sort and render the ns, marking what was rendered.
386            ns_to_render.sort(_sorter_ns)
387            for n,v in ns_to_render:
388                self._do_attr(n, v)
389                ns_rendered[n]=v    #0417
390
391            # If exclusive or the parent is in the subset, add the local xml attributes
392            # Else, add all local and ancestor xml attributes
393            # Sort and render the attributes.
394            if not inclusive or _in_subset(self.subset,node.parentNode):  #0426
395                other_attrs.extend(xml_attrs_local.values())
396            else:
397                other_attrs.extend(xml_attrs.values())
398            other_attrs.sort(_sorter)
399            for a in other_attrs:
400                self._do_attr(a.nodeName, a.value)
401            W('>')
402
403        # Push state, recurse, pop state.
404        state, self.state = self.state, (ns_local, ns_rendered, xml_attrs, ns_unused_inherited)
405        for c in _children(node):
406            _implementation.handlers[c.nodeType](self, c)
407        self.state = state
408
409        if name: W('</%s>' % name)
410    handlers[Node.ELEMENT_NODE] = _do_element
411
412
413def Canonicalize(node, output=None, **kw):
414    '''Canonicalize(node, output=None, **kw) -> UTF-8
415
416    Canonicalize a DOM document/element node and all descendents.
417    Return the text; if output is specified then output.write will
418    be called to output the text and None will be returned
419    Keyword parameters:
420        nsdict: a dictionary of prefix:uri namespace entries
421                assumed to exist in the surrounding context
422        comments: keep comments if non-zero (default is 0)
423        subset: Canonical XML subsetting resulting from XPath
424                (default is [])
425        unsuppressedPrefixes: do exclusive C14N, and this specifies the
426                prefixes that should be inherited.
427    '''
428    if output:
429        apply(_implementation, (node, output.write), kw)
430    else:
431        s = StringIO.StringIO()
432        apply(_implementation, (node, s.write), kw)
433        return s.getvalue()
Note: See TracBrowser for help on using the repository browser.