Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: OpenRLabs-Git/deploy/rlabs-docker/web2py-rlabs/gluon/contrib/pysimplesoap/c14n.py

main

Last change on this file was 42bd667, checked in by David Fuertes <dfuertes@…>, 4 years ago
Historial Limpio
Property mode set to `100755`
File size: 16.1 KB

Line
1	#! /usr/bin/env python
2	'''XML Canonicalization
3
4	Patches Applied to xml.dom.ext.c14n:
5	http://sourceforge.net/projects/pyxml/
6
7	[ 1444526 ] c14n.py: http://www.w3.org/TR/xml-exc-c14n/ fix
8	-- includes [ 829905 ] c14n.py fix for bug #825115,
9	Date Submitted: 2003-10-24 23:43
10	-- include dependent namespace declarations declared in ancestor nodes
11	(checking attributes and tags),
12	-- handle InclusiveNamespaces PrefixList parameter
13
14	This module generates canonical XML of a document or element.
15	http://www.w3.org/TR/2001/REC-xml-c14n-20010315
16	and includes a prototype of exclusive canonicalization
17	http://www.w3.org/Signature/Drafts/xml-exc-c14n
18
19	Requires PyXML 0.7.0 or later.
20
21	Known issues if using Ft.Lib.pDomlette:
22	1. Unicode
23	2. does not white space normalize attributes of type NMTOKEN and ID?
24	3. seems to be include "\n" after importing external entities?
25
26	Note, this version processes a DOM tree, and consequently it processes
27	namespace nodes as attributes, not from a node's namespace axis. This
28	permits simple document and element canonicalization without
29	XPath. When XPath is used, the XPath result node list is passed and used to
30	determine if the node is in the XPath result list, but little else.
31
32	Authors:
33	"Joseph M. Reagle Jr." <reagle@w3.org>
34	"Rich Salz" <rsalz@zolera.com>
35
36	$Date: 2006-03-30 23:47:16 +0000 (Thu, 30 Mar 2006) $ by $Author: boverhof $
37	'''
38
39	_copyright = '''Copyright 2001, Zolera Systems Inc. All Rights Reserved.
40	Copyright 2001, MIT. All Rights Reserved.
41
42	Distributed under the terms of:
43	Python 2.0 License or later.
44	http://www.python.org/2.0.1/license.html
45	or
46	W3C Software License
47	http://www.w3.org/Consortium/Legal/copyright-software-19980720
48	'''
49
50	import string
51	from xml.dom import Node
52	try:
53	from xml.ns import XMLNS
54	except:
55	class XMLNS:
56	BASE = "http://www.w3.org/2000/xmlns/"
57	XML = "http://www.w3.org/XML/1998/namespace"
58	try:
59	import cStringIO
60	StringIO = cStringIO
61	except ImportError:
62	import StringIO
63
64	_attrs = lambda E: (E.attributes and E.attributes.values()) or []
65	_children = lambda E: E.childNodes or []
66	_IN_XML_NS = lambda n: n.name.startswith("xmlns")
67	_inclusive = lambda n: n.unsuppressedPrefixes == None
68
69
70	# Does a document/PI has lesser/greater document order than the
71	# first element?
72	_LesserElement, _Element, _GreaterElement = range(3)
73
74	def _sorter(n1,n2):
75	'''_sorter(n1,n2) -> int
76	Sorting predicate for non-NS attributes.'''
77
78	i = cmp(n1.namespaceURI, n2.namespaceURI)
79	if i: return i
80	return cmp(n1.localName, n2.localName)
81
82
83	def _sorter_ns(n1,n2):
84	'''_sorter_ns((n,v),(n,v)) -> int
85	"(an empty namespace URI is lexicographically least)."'''
86
87	if n1[0] == 'xmlns': return -1
88	if n2[0] == 'xmlns': return 1
89	return cmp(n1[0], n2[0])
90
91	def _utilized(n, node, other_attrs, unsuppressedPrefixes):
92	'''_utilized(n, node, other_attrs, unsuppressedPrefixes) -> boolean
93	Return true if that nodespace is utilized within the node'''
94	if n.startswith('xmlns:'):
95	n = n[6:]
96	elif n.startswith('xmlns'):
97	n = n[5:]
98	if (n=="" and node.prefix in ["#default", None]) or \
99	n == node.prefix or n in unsuppressedPrefixes:
100	return 1
101	for attr in other_attrs:
102	if n == attr.prefix: return 1
103	# For exclusive need to look at attributes
104	if unsuppressedPrefixes is not None:
105	for attr in _attrs(node):
106	if n == attr.prefix: return 1
107
108	return 0
109
110
111	def _inclusiveNamespacePrefixes(node, context, unsuppressedPrefixes):
112	'''http://www.w3.org/TR/xml-exc-c14n/
113	InclusiveNamespaces PrefixList parameter, which lists namespace prefixes that
114	are handled in the manner described by the Canonical XML Recommendation'''
115	inclusive = []
116	if node.prefix:
117	usedPrefixes = ['xmlns:%s' %node.prefix]
118	else:
119	usedPrefixes = ['xmlns']
120
121	for a in _attrs(node):
122	if a.nodeName.startswith('xmlns') or not a.prefix: continue
123	usedPrefixes.append('xmlns:%s' %a.prefix)
124
125	unused_namespace_dict = {}
126	for attr in context:
127	n = attr.nodeName
128	if n in unsuppressedPrefixes:
129	inclusive.append(attr)
130	elif n.startswith('xmlns:') and n[6:] in unsuppressedPrefixes:
131	inclusive.append(attr)
132	elif n.startswith('xmlns') and n[5:] in unsuppressedPrefixes:
133	inclusive.append(attr)
134	elif attr.nodeName in usedPrefixes:
135	inclusive.append(attr)
136	elif n.startswith('xmlns:'):
137	unused_namespace_dict[n] = attr.value
138
139	return inclusive, unused_namespace_dict
140
141	#_in_subset = lambda subset, node: not subset or node in subset
142	_in_subset = lambda subset, node: subset is None or node in subset # rich's tweak
143
144
145	class _implementation:
146	'''Implementation class for C14N. This accompanies a node during it's
147	processing and includes the parameters and processing state.'''
148
149	# Handler for each node type; populated during module instantiation.
150	handlers = {}
151
152	def __init__(self, node, write, **kw):
153	'''Create and run the implementation.'''
154	self.write = write
155	self.subset = kw.get('subset')
156	self.comments = kw.get('comments', 0)
157	self.unsuppressedPrefixes = kw.get('unsuppressedPrefixes')
158	nsdict = kw.get('nsdict', { 'xml': XMLNS.XML, 'xmlns': XMLNS.BASE })
159
160	# Processing state.
161	self.state = (nsdict, {'xml':''}, {}, {}) #0422
162
163	if node.nodeType == Node.DOCUMENT_NODE:
164	self._do_document(node)
165	elif node.nodeType == Node.ELEMENT_NODE:
166	self.documentOrder = _Element # At document element
167	if not _inclusive(self):
168	inherited,unused = _inclusiveNamespacePrefixes(node, self._inherit_context(node),
169	self.unsuppressedPrefixes)
170	self._do_element(node, inherited, unused=unused)
171	else:
172	inherited = self._inherit_context(node)
173	self._do_element(node, inherited)
174	elif node.nodeType == Node.DOCUMENT_TYPE_NODE:
175	pass
176	else:
177	raise TypeError(str(node))
178
179
180	def _inherit_context(self, node):
181	'''_inherit_context(self, node) -> list
182	Scan ancestors of attribute and namespace context. Used only
183	for single element node canonicalization, not for subset
184	canonicalization.'''
185
186	# Collect the initial list of xml:foo attributes.
187	xmlattrs = filter(_IN_XML_NS, _attrs(node))
188
189	# Walk up and get all xml:XXX attributes we inherit.
190	inherited, parent = [], node.parentNode
191	while parent and parent.nodeType == Node.ELEMENT_NODE:
192	for a in filter(_IN_XML_NS, _attrs(parent)):
193	n = a.localName
194	if n not in xmlattrs:
195	xmlattrs.append(n)
196	inherited.append(a)
197	parent = parent.parentNode
198	return inherited
199
200
201	def _do_document(self, node):
202	'''_do_document(self, node) -> None
203	Process a document node. documentOrder holds whether the document
204	element has been encountered such that PIs/comments can be written
205	as specified.'''
206
207	self.documentOrder = _LesserElement
208	for child in node.childNodes:
209	if child.nodeType == Node.ELEMENT_NODE:
210	self.documentOrder = _Element # At document element
211	self._do_element(child)
212	self.documentOrder = _GreaterElement # After document element
213	elif child.nodeType == Node.PROCESSING_INSTRUCTION_NODE:
214	self._do_pi(child)
215	elif child.nodeType == Node.COMMENT_NODE:
216	self._do_comment(child)
217	elif child.nodeType == Node.DOCUMENT_TYPE_NODE:
218	pass
219	else:
220	raise TypeError(str(child))
221	handlers[Node.DOCUMENT_NODE] = _do_document
222
223
224	def _do_text(self, node):
225	'''_do_text(self, node) -> None
226	Process a text or CDATA node. Render various special characters
227	as their C14N entity representations.'''
228	if not _in_subset(self.subset, node): return
229	s = string.replace(node.data, "&", "&")
230	s = string.replace(s, "<", "<")
231	s = string.replace(s, ">", ">")
232	s = string.replace(s, "\015", " ")
233	if s: self.write(s)
234	handlers[Node.TEXT_NODE] = _do_text
235	handlers[Node.CDATA_SECTION_NODE] = _do_text
236
237
238	def _do_pi(self, node):
239	'''_do_pi(self, node) -> None
240	Process a PI node. Render a leading or trailing #xA if the
241	document order of the PI is greater or lesser (respectively)
242	than the document element.
243	'''
244	if not _in_subset(self.subset, node): return
245	W = self.write
246	if self.documentOrder == _GreaterElement: W('\n')
247	W('<?')
248	W(node.nodeName)
249	s = node.data
250	if s:
251	W(' ')
252	W(s)
253	W('?>')
254	if self.documentOrder == _LesserElement: W('\n')
255	handlers[Node.PROCESSING_INSTRUCTION_NODE] = _do_pi
256
257
258	def _do_comment(self, node):
259	'''_do_comment(self, node) -> None
260	Process a comment node. Render a leading or trailing #xA if the
261	document order of the comment is greater or lesser (respectively)
262	than the document element.
263	'''
264	if not _in_subset(self.subset, node): return
265	if self.comments:
266	W = self.write
267	if self.documentOrder == _GreaterElement: W('\n')
268	W('<!--')
269	W(node.data)
270	W('-->')
271	if self.documentOrder == _LesserElement: W('\n')
272	handlers[Node.COMMENT_NODE] = _do_comment
273
274
275	def _do_attr(self, n, value):
276	''''_do_attr(self, node) -> None
277	Process an attribute.'''
278
279	W = self.write
280	W(' ')
281	W(n)
282	W('="')
283	s = string.replace(value, "&", "&")
284	s = string.replace(s, "<", "<")
285	s = string.replace(s, '"', '"')
286	s = string.replace(s, '\011', '&#x9')
287	s = string.replace(s, '\012', '&#xA')
288	s = string.replace(s, '\015', '&#xD')
289	W(s)
290	W('"')
291
292
293	def _do_element(self, node, initial_other_attrs = [], unused = None):
294	'''_do_element(self, node, initial_other_attrs = [], unused = {}) -> None
295	Process an element (and its children).'''
296
297	# Get state (from the stack) make local copies.
298	# ns_parent -- NS declarations in parent
299	# ns_rendered -- NS nodes rendered by ancestors
300	# ns_local -- NS declarations relevant to this element
301	# xml_attrs -- Attributes in XML namespace from parent
302	# xml_attrs_local -- Local attributes in XML namespace.
303	# ns_unused_inherited -- not rendered namespaces, used for exclusive
304	ns_parent, ns_rendered, xml_attrs = \
305	self.state[0], self.state[1].copy(), self.state[2].copy() #0422
306
307	ns_unused_inherited = unused
308	if unused is None:
309	ns_unused_inherited = self.state[3].copy()
310
311	ns_local = ns_parent.copy()
312	inclusive = _inclusive(self)
313	xml_attrs_local = {}
314
315	# Divide attributes into NS, XML, and others.
316	other_attrs = []
317	in_subset = _in_subset(self.subset, node)
318	for a in initial_other_attrs + _attrs(node):
319	if a.namespaceURI == XMLNS.BASE:
320	n = a.nodeName
321	if n == "xmlns:": n = "xmlns" # DOM bug workaround
322	ns_local[n] = a.nodeValue
323	elif a.namespaceURI == XMLNS.XML:
324	if inclusive or (in_subset and _in_subset(self.subset, a)): #020925 Test to see if attribute node in subset
325	xml_attrs_local[a.nodeName] = a #0426
326	else:
327	if _in_subset(self.subset, a): #020925 Test to see if attribute node in subset
328	other_attrs.append(a)
329
330	# # TODO: exclusive, might need to define xmlns:prefix here
331	# if not inclusive and a.prefix is not None and not ns_rendered.has_key('xmlns:%s' %a.prefix):
332	# ns_local['xmlns:%s' %a.prefix] = ??
333
334	#add local xml:foo attributes to ancestor's xml:foo attributes
335	xml_attrs.update(xml_attrs_local)
336
337	# Render the node
338	W, name = self.write, None
339	if in_subset:
340	name = node.nodeName
341	if not inclusive:
342	if node.prefix is not None:
343	prefix = 'xmlns:%s' %node.prefix
344	else:
345	prefix = 'xmlns'
346
347	if not ns_rendered.has_key(prefix) and not ns_local.has_key(prefix):
348	if not ns_unused_inherited.has_key(prefix):
349	raise RuntimeError(\
350	'For exclusive c14n, unable to map prefix "%s" in %s' %(
351	prefix, node))
352
353	ns_local[prefix] = ns_unused_inherited[prefix]
354	del ns_unused_inherited[prefix]
355
356	W('<')
357	W(name)
358
359	# Create list of NS attributes to render.
360	ns_to_render = []
361	for n,v in ns_local.items():
362
363	# If default namespace is XMLNS.BASE or empty,
364	# and if an ancestor was the same
365	if n == "xmlns" and v in [ XMLNS.BASE, '' ] \
366	and ns_rendered.get('xmlns') in [ XMLNS.BASE, '', None ]:
367	continue
368
369	# "omit namespace node with local name xml, which defines
370	# the xml prefix, if its string value is
371	# http://www.w3.org/XML/1998/namespace."
372	if n in ["xmlns:xml", "xml"] \
373	and v in [ 'http://www.w3.org/XML/1998/namespace' ]:
374	continue
375
376
377	# If not previously rendered
378	# and it's inclusive or utilized
379	if (n,v) not in ns_rendered.items():
380	if inclusive or _utilized(n, node, other_attrs, self.unsuppressedPrefixes):
381	ns_to_render.append((n, v))
382	elif not inclusive:
383	ns_unused_inherited[n] = v
384
385	# Sort and render the ns, marking what was rendered.
386	ns_to_render.sort(_sorter_ns)
387	for n,v in ns_to_render:
388	self._do_attr(n, v)
389	ns_rendered[n]=v #0417
390
391	# If exclusive or the parent is in the subset, add the local xml attributes
392	# Else, add all local and ancestor xml attributes
393	# Sort and render the attributes.
394	if not inclusive or _in_subset(self.subset,node.parentNode): #0426
395	other_attrs.extend(xml_attrs_local.values())
396	else:
397	other_attrs.extend(xml_attrs.values())
398	other_attrs.sort(_sorter)
399	for a in other_attrs:
400	self._do_attr(a.nodeName, a.value)
401	W('>')
402
403	# Push state, recurse, pop state.
404	state, self.state = self.state, (ns_local, ns_rendered, xml_attrs, ns_unused_inherited)
405	for c in _children(node):
406	_implementation.handlers[c.nodeType](self, c)
407	self.state = state
408
409	if name: W('</%s>' % name)
410	handlers[Node.ELEMENT_NODE] = _do_element
411
412
413	def Canonicalize(node, output=None, **kw):
414	'''Canonicalize(node, output=None, **kw) -> UTF-8
415
416	Canonicalize a DOM document/element node and all descendents.
417	Return the text; if output is specified then output.write will
418	be called to output the text and None will be returned
419	Keyword parameters:
420	nsdict: a dictionary of prefix:uri namespace entries
421	assumed to exist in the surrounding context
422	comments: keep comments if non-zero (default is 0)
423	subset: Canonical XML subsetting resulting from XPath
424	(default is [])
425	unsuppressedPrefixes: do exclusive C14N, and this specifies the
426	prefixes that should be inherited.
427	'''
428	if output:
429	apply(_implementation, (node, output.write), kw)
430	else:
431	s = StringIO.StringIO()
432	apply(_implementation, (node, s.write), kw)
433	return s.getvalue()

Note: See TracBrowser for help on using the repository browser.

Download in other formats: