1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 """
18 Simplexml module provides xmpppy library with all needed tools to handle XML
19 nodes and XML streams. I'm personally using it in many other separate
20 projects. It is designed to be as standalone as possible
21 """
22
23 import xml.parsers.expat
24 import logging
25 log = logging.getLogger('gajim.c.x.simplexml')
26
28 """
29 Return provided string with symbols & < > " replaced by their respective XML
30 entities
31 """
32
33 return txt.replace("&", "&").replace("<", "<").replace(">", ">").replace('"', """).replace(u'\x0C', "").replace(u'\x1B', "")
34
35 ENCODING='utf-8'
36
38 """
39 Converts object "what" to unicode string using it's own __str__ method if
40 accessible or unicode method otherwise
41 """
42 if isinstance(what, unicode):
43 return what
44 try:
45 r = what.__str__()
46 except AttributeError:
47 r = str(what)
48 if not isinstance(r, unicode):
49 return unicode(r, ENCODING)
50 return r
51
53 """
54 Node class describes syntax of separate XML Node. It have a constructor that
55 permits node creation from set of "namespace name", attributes and payload
56 of text strings and other nodes. It does not natively support building node
57 from text string and uses NodeBuilder class for that purpose. After
58 creation node can be mangled in many ways so it can be completely changed.
59 Also node can be serialised into string in one of two modes: default (where
60 the textual representation of node describes it exactly) and "fancy" - with
61 whitespace added to make indentation and thus make result more readable by
62 human.
63
64 Node class have attribute FORCE_NODE_RECREATION that is defaults to False
65 thus enabling fast node replication from the some other node. The drawback
66 of the fast way is that new node shares some info with the "original" node
67 that is changing the one node may influence the other. Though it is rarely
68 needed (in xmpppy it is never needed at all since I'm usually never using
69 original node after replication (and using replication only to move upwards
70 on the classes tree).
71 """
72
73 FORCE_NODE_RECREATION = 0
74
75 - def __init__(self, tag=None, attrs={}, payload=[], parent=None, nsp=None,
76 node_built=False, node=None):
77 """
78 Takes "tag" argument as the name of node (prepended by namespace, if
79 needed and separated from it by a space), attrs dictionary as the set of
80 arguments, payload list as the set of textual strings and child nodes
81 that this node carries within itself and "parent" argument that is
82 another node that this one will be the child of. Also the __init__ can be
83 provided with "node" argument that is either a text string containing
84 exactly one node or another Node instance to begin with. If both "node"
85 and other arguments is provided then the node initially created as
86 replica of "node" provided and then modified to be compliant with other
87 arguments.
88 """
89 if node:
90 if self.FORCE_NODE_RECREATION and isinstance(node, Node):
91 node = str(node)
92 if not isinstance(node, Node):
93 node = NodeBuilder(node, self)
94 node_built = True
95 else:
96 self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = node.name, node.namespace, {}, [], [], node.parent, {}
97 for key in node.attrs.keys():
98 self.attrs[key] = node.attrs[key]
99 for data in node.data:
100 self.data.append(data)
101 for kid in node.kids:
102 self.kids.append(kid)
103 for k, v in node.nsd.items():
104 self.nsd[k] = v
105 else:
106 self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = 'tag', '', {}, [], [], None, {}
107 if parent:
108 self.parent = parent
109 self.nsp_cache = {}
110 if nsp:
111 for k, v in nsp.items(): self.nsp_cache[k] = v
112 for attr, val in attrs.items():
113 if attr == 'xmlns':
114 self.nsd[u''] = val
115 elif attr.startswith('xmlns:'):
116 self.nsd[attr[6:]] = val
117 self.attrs[attr]=attrs[attr]
118 if tag:
119 if node_built:
120 pfx, self.name = (['']+tag.split(':'))[-2:]
121 self.namespace = self.lookup_nsp(pfx)
122 else:
123 if ' ' in tag:
124 self.namespace, self.name = tag.split()
125 else:
126 self.name = tag
127 if isinstance(payload, basestring): payload=[payload]
128 for i in payload:
129 if isinstance(i, Node):
130 self.addChild(node=i)
131 else:
132 self.data.append(ustr(i))
133
135 ns = self.nsd.get(pfx, None)
136 if ns is None:
137 ns = self.nsp_cache.get(pfx, None)
138 if ns is None:
139 if self.parent:
140 ns = self.parent.lookup_nsp(pfx)
141 self.nsp_cache[pfx] = ns
142 else:
143 return 'http://www.gajim.org/xmlns/undeclared'
144 return ns
145
147 """
148 Method used to dump node into textual representation. If "fancy" argument
149 is set to True produces indented output for readability
150 """
151 s = (fancy-1) * 2 * ' ' + "<" + self.name
152 if self.namespace:
153 if not self.parent or self.parent.namespace!=self.namespace:
154 if 'xmlns' not in self.attrs:
155 s = s + ' xmlns="%s"'%self.namespace
156 for key in self.attrs.keys():
157 val = ustr(self.attrs[key])
158 s = s + ' %s="%s"' % ( key, XMLescape(val) )
159 s = s + ">"
160 cnt = 0
161 if self.kids:
162 if fancy: s = s + "\n"
163 for a in self.kids:
164 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt])
165 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip())
166 if isinstance(a, str) or isinstance(a, unicode):
167 s = s + a.__str__()
168 else:
169 s = s + a.__str__(fancy and fancy+1)
170 cnt=cnt+1
171 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt])
172 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip())
173 if not self.kids and s.endswith('>'):
174 s=s[:-1]+' />'
175 if fancy: s = s + "\n"
176 else:
177 if fancy and not self.data: s = s + (fancy-1) * 2 * ' '
178 s = s + "</" + self.name + ">"
179 if fancy: s = s + "\n"
180 return s
181
182 - def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
183 """
184 If "node" argument is provided, adds it as child node. Else creates new
185 node from the other arguments' values and adds it as well
186 """
187 if 'xmlns' in attrs:
188 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}")
189 if node:
190 newnode=node
191 node.parent = self
192 else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload)
193 if namespace:
194 newnode.setNamespace(namespace)
195 self.kids.append(newnode)
196 return newnode
197
199 """
200 Add some CDATA to node
201 """
202 self.data.append(ustr(data))
203
205 """
206 Remove all CDATA from the node
207 """
208 self.data = []
209
211 """
212 Delete an attribute "key"
213 """
214 del self.attrs[key]
215
217 """
218 Delete the "node" from the node's childs list, if "node" is an instance.
219 Else delete the first node that have specified name and (optionally)
220 attributes
221 """
222 if not isinstance(node, Node):
223 node = self.getTag(node, attrs)
224 self.kids.remove(node)
225 return node
226
228 """
229 Return all node's attributes as dictionary
230 """
231 return self.attrs
232
234 """
235 Return value of specified attribute
236 """
237 return self.attrs.get(key)
238
240 """
241 Return all node's child nodes as list
242 """
243 return self.kids
244
246 """
247 Return all node CDATA as string (concatenated)
248 """
249 return ''.join(self.data)
250
252 """
253 Return the name of node
254 """
255 return self.name
256
258 """
259 Return the namespace of node
260 """
261 return self.namespace
262
264 """
265 Returns the parent of node (if present)
266 """
267 return self.parent
268
270 """
271 Return the payload of node i.e. list of child nodes and CDATA entries.
272 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned
273 list: ['text1', <nodea instance>, <nodeb instance>, ' text2']
274 """
275 ret = []
276 for i in range(len(self.kids)+len(self.data)+1):
277 try:
278 if self.data[i]:
279 ret.append(self.data[i])
280 except IndexError:
281 pass
282 try:
283 ret.append(self.kids[i])
284 except IndexError:
285 pass
286 return ret
287
288 - def getTag(self, name, attrs={}, namespace=None):
289 """
290 Filter all child nodes using specified arguments as filter. Return the
291 first found or None if not found
292 """
293 return self.getTags(name, attrs, namespace, one=1)
294
296 """
297 Return attribute value of the child with specified name (or None if no
298 such attribute)
299 """
300 try:
301 return self.getTag(tag).attrs[attr]
302 except:
303 return None
304
306 """
307 Return cocatenated CDATA of the child with specified name
308 """
309 try:
310 return self.getTag(tag).getData()
311 except Exception:
312 return None
313
333
348
350 """
351 Set attribute "key" with the value "val"
352 """
353 self.attrs[key] = val
354
356 """
357 Set node's CDATA to provided string. Resets all previous CDATA!
358 """
359 self.data = [ustr(data)]
360
362 """
363 Change the node name
364 """
365 self.name = val
366
372
374 """
375 Set node's parent to "node". WARNING: do not checks if the parent already
376 present and not removes the node from the list of childs of previous
377 parent
378 """
379 self.parent = node
380
382 """
383 Set node payload according to the list specified. WARNING: completely
384 replaces all node's previous content. If you wish just to add child or
385 CDATA - use addData or addChild methods
386 """
387 if isinstance(payload, basestring):
388 payload = [payload]
389 if add:
390 self.kids += payload
391 else:
392 self.kids = payload
393
394 - def setTag(self, name, attrs={}, namespace=None):
395 """
396 Same as getTag but if the node with specified namespace/attributes not
397 found, creates such node and returns it
398 """
399 node = self.getTags(name, attrs, namespace=namespace, one=1)
400 if node:
401 return node
402 else:
403 return self.addChild(name, attrs, namespace=namespace)
404
406 """
407 Create new node (if not already present) with name "tag" and set it's
408 attribute "attr" to value "val"
409 """
410 try:
411 self.getTag(tag).attrs[attr] = val
412 except Exception:
413 self.addChild(tag, attrs={attr: val})
414
416 """
417 Creates new node (if not already present) with name "tag" and
418 (optionally) attributes "attrs" and sets it's CDATA to string "val"
419 """
420 try:
421 self.getTag(tag, attrs).setData(ustr(val))
422 except Exception:
423 self.addChild(tag, attrs, payload = [ustr(val)])
424
426 """
427 Check if node have attribute "key"
428 """
429 return key in self.attrs
430
432 """
433 Return node's attribute "item" value
434 """
435 return self.getAttr(item)
436
438 """
439 Set node's attribute "item" value
440 """
441 return self.setAttr(item, val)
442
444 """
445 Delete node's attribute "item"
446 """
447 return self.delAttr(item)
448
450 """
451 Check if node has attribute "item"
452 """
453 return self.has_attr(item)
454
456 """
457 Reduce memory usage caused by T/NT classes - use memory only when needed
458 """
459 if attr == 'T':
460 self.T = T(self)
461 return self.T
462 if attr == 'NT':
463 self.NT = NT(self)
464 return self.NT
465 raise AttributeError
466
468 """
469 Auxiliary class used to quick access to node's child nodes
470 """
471
473 self.__dict__['node'] = node
474
476 return self.node.setTag(attr)
477
483
486
488 """
489 Auxiliary class used to quick create node's child nodes
490 """
491
494
496 if isinstance(val, Node):
497 self.node.addChild(attr, node=val)
498 else:
499 return self.node.addChild(attr, payload=[val])
500
502 """
503 Builds a Node class minidom from data parsed to it. This class used for two
504 purposes:
505 1. Creation an XML Node from a textual representation. F.e. reading a
506 config file. See an XML2Node method.
507 2. Handling an incoming XML stream. This is done by mangling the
508 __dispatch_depth parameter and redefining the dispatch method.
509
510 You do not need to use this class directly if you do not designing your own
511 XML handler
512 """
513
514 - def __init__(self, data=None, initial_node=None):
515 """
516 Take two optional parameters: "data" and "initial_node"
517
518 By default class initialised with empty Node class instance. Though, if
519 "initial_node" is provided it used as "starting point". You can think
520 about it as of "node upgrade". "data" (if provided) feeded to parser
521 immidiatedly after instance init.
522 """
523 log.debug("Preparing to handle incoming XML stream.")
524 self._parser = xml.parsers.expat.ParserCreate()
525 self._parser.StartElementHandler = self.starttag
526 self._parser.EndElementHandler = self.endtag
527 self._parser.StartNamespaceDeclHandler = self.handle_namespace_start
528 self._parser.CharacterDataHandler = self.handle_cdata
529 self._parser.buffer_text = True
530 self.Parse = self._parser.Parse
531
532 self.__depth = 0
533 self.__last_depth = 0
534 self.__max_depth = 0
535 self._dispatch_depth = 1
536 self._document_attrs = None
537 self._document_nsp = None
538 self._mini_dom=initial_node
539 self.last_is_data = 1
540 self._ptr=None
541 self.data_buffer = None
542 self.streamError = ''
543 if data:
544 self._parser.Parse(data, 1)
545
547 if self.data_buffer:
548 self._ptr.data.append(''.join(self.data_buffer))
549 del self.data_buffer[:]
550 self.data_buffer = None
551
553 """
554 Method used to allow class instance to be garbage-collected
555 """
556 self.check_data_buffer()
557 self._parser.StartElementHandler = None
558 self._parser.EndElementHandler = None
559 self._parser.CharacterDataHandler = None
560 self._parser.StartNamespaceDeclHandler = None
561
563 """
564 XML Parser callback. Used internally
565 """
566 self.check_data_buffer()
567 self._inc_depth()
568 log.info("STARTTAG.. DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`))
569 if self.__depth == self._dispatch_depth:
570 if not self._mini_dom :
571 self._mini_dom = Node(tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True)
572 else:
573 Node.__init__(self._mini_dom, tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True)
574 self._ptr = self._mini_dom
575 elif self.__depth > self._dispatch_depth:
576 self._ptr.kids.append(Node(tag=tag, parent=self._ptr, attrs=attrs, node_built=True))
577 self._ptr = self._ptr.kids[-1]
578 if self.__depth == 1:
579 self._document_attrs = {}
580 self._document_nsp = {}
581 nsp, name = (['']+tag.split(':'))[-2:]
582 for attr, val in attrs.items():
583 if attr == 'xmlns':
584 self._document_nsp[u''] = val
585 elif attr.startswith('xmlns:'):
586 self._document_nsp[attr[6:]] = val
587 else:
588 self._document_attrs[attr] = val
589 ns = self._document_nsp.get(nsp, 'http://www.gajim.org/xmlns/undeclared-root')
590 try:
591 self.stream_header_received(ns, name, attrs)
592 except ValueError, e:
593 self._document_attrs = None
594 raise ValueError(str(e))
595 if not self.last_is_data and self._ptr.parent:
596 self._ptr.parent.data.append('')
597 self.last_is_data = 0
598
600 """
601 XML Parser callback. Used internally
602 """
603 log.info("DEPTH -> %i , tag -> %s" % (self.__depth, tag))
604 self.check_data_buffer()
605 if self.__depth == self._dispatch_depth:
606 if self._mini_dom.getName() == 'error':
607 children = self._mini_dom.getChildren()
608 if children:
609 self.streamError = children[0].getName()
610 else:
611 self.streamError = self._mini_dom.getData()
612 self.dispatch(self._mini_dom)
613 elif self.__depth > self._dispatch_depth:
614 self._ptr = self._ptr.parent
615 else:
616 log.info("Got higher than dispatch level. Stream terminated?")
617 self._dec_depth()
618 self.last_is_data = 0
619 if self.__depth == 0: self.stream_footer_received()
620
622 if self.last_is_data:
623 if self.data_buffer:
624 self.data_buffer.append(data)
625 elif self._ptr:
626 self.data_buffer = [data]
627 self.last_is_data = 1
628
630 """
631 XML Parser callback. Used internally
632 """
633 self.check_data_buffer()
634
636 """
637 Return just built Node
638 """
639 self.check_data_buffer()
640 return self._mini_dom
641
643 """
644 Get called when the NodeBuilder reaches some level of depth on it's way
645 up with the built node as argument. Can be redefined to convert incoming
646 XML stanzas to program events
647 """
648 pass
649
651 """
652 Method called when stream just opened
653 """
654 self.check_data_buffer()
655
661
663 """
664 Return True if at least one end tag was seen (at level)
665 """
666 return self.__depth <= level and self.__max_depth > level
667
669 self.__last_depth = self.__depth
670 self.__depth += 1
671 self.__max_depth = max(self.__depth, self.__max_depth)
672
674 self.__last_depth = self.__depth
675 self.__depth -= 1
676
678 """
679 Convert supplied textual string into XML node. Handy f.e. for reading
680 configuration file. Raises xml.parser.expat.parsererror if provided string
681 is not well-formed XML
682 """
683 return NodeBuilder(xml).getDom()
684
686 """
687 Convert supplied textual string into XML node. Survives if xml data is
688 cutted half way round. I.e. "<html>some text <br>some more text". Will raise
689 xml.parser.expat.parsererror on misplaced tags though. F.e. "<b>some text
690 <br>some more text</b>" will not work
691 """
692 return NodeBuilder(xml).getDom()
693