Package common :: Package xmpp :: Module simplexml
[hide private]
[frames] | no frames]

Source Code for Module common.xmpp.simplexml

  1  ##   simplexml.py based on Mattew Allum's xmlstream.py 
  2  ## 
  3  ##   Copyright (C) 2003-2005 Alexey "Snake" Nezhdanov 
  4  ## 
  5  ##   This program is free software; you can redistribute it and/or modify 
  6  ##   it under the terms of the GNU General Public License as published by 
  7  ##   the Free Software Foundation; either version 2, or (at your option) 
  8  ##   any later version. 
  9  ## 
 10  ##   This program is distributed in the hope that it will be useful, 
 11  ##   but WITHOUT ANY WARRANTY; without even the implied warranty of 
 12  ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 13  ##   GNU General Public License for more details. 
 14   
 15  # $Id: simplexml.py,v 1.27 2005/04/30 07:20:27 snakeru Exp $ 
 16   
 17  """ 
 18  Simplexml module provides xmpppy library with all needed tools to handle XML 
 19  nodes and XML streams. I'm personally using it in many other separate 
 20  projects. It is designed to be as standalone as possible 
 21  """ 
 22   
 23  import xml.parsers.expat 
 24  import logging 
 25  log = logging.getLogger('gajim.c.x.simplexml') 
 26   
27 -def XMLescape(txt):
28 """ 29 Return provided string with symbols & < > " replaced by their respective XML 30 entities 31 """ 32 # replace also FORM FEED and ESC, because they are not valid XML chars 33 return txt.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace('"', "&quot;").replace(u'\x0C', "").replace(u'\x1B', "")
34 35 ENCODING='utf-8' 36
37 -def ustr(what):
38 """ 39 Converts object "what" to unicode string using it's own __str__ method if 40 accessible or unicode method otherwise 41 """ 42 if isinstance(what, unicode): 43 return what 44 try: 45 r = what.__str__() 46 except AttributeError: 47 r = str(what) 48 if not isinstance(r, unicode): 49 return unicode(r, ENCODING) 50 return r
51
52 -class Node(object):
53 """ 54 Node class describes syntax of separate XML Node. It have a constructor that 55 permits node creation from set of "namespace name", attributes and payload 56 of text strings and other nodes. It does not natively support building node 57 from text string and uses NodeBuilder class for that purpose. After 58 creation node can be mangled in many ways so it can be completely changed. 59 Also node can be serialised into string in one of two modes: default (where 60 the textual representation of node describes it exactly) and "fancy" - with 61 whitespace added to make indentation and thus make result more readable by 62 human. 63 64 Node class have attribute FORCE_NODE_RECREATION that is defaults to False 65 thus enabling fast node replication from the some other node. The drawback 66 of the fast way is that new node shares some info with the "original" node 67 that is changing the one node may influence the other. Though it is rarely 68 needed (in xmpppy it is never needed at all since I'm usually never using 69 original node after replication (and using replication only to move upwards 70 on the classes tree). 71 """ 72 73 FORCE_NODE_RECREATION = 0 74
75 - def __init__(self, tag=None, attrs={}, payload=[], parent=None, nsp=None, 76 node_built=False, node=None):
77 """ 78 Takes "tag" argument as the name of node (prepended by namespace, if 79 needed and separated from it by a space), attrs dictionary as the set of 80 arguments, payload list as the set of textual strings and child nodes 81 that this node carries within itself and "parent" argument that is 82 another node that this one will be the child of. Also the __init__ can be 83 provided with "node" argument that is either a text string containing 84 exactly one node or another Node instance to begin with. If both "node" 85 and other arguments is provided then the node initially created as 86 replica of "node" provided and then modified to be compliant with other 87 arguments. 88 """ 89 if node: 90 if self.FORCE_NODE_RECREATION and isinstance(node, Node): 91 node = str(node) 92 if not isinstance(node, Node): 93 node = NodeBuilder(node, self) 94 node_built = True 95 else: 96 self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = node.name, node.namespace, {}, [], [], node.parent, {} 97 for key in node.attrs.keys(): 98 self.attrs[key] = node.attrs[key] 99 for data in node.data: 100 self.data.append(data) 101 for kid in node.kids: 102 self.kids.append(kid) 103 for k, v in node.nsd.items(): 104 self.nsd[k] = v 105 else: 106 self.name, self.namespace, self.attrs, self.data, self.kids, self.parent, self.nsd = 'tag', '', {}, [], [], None, {} 107 if parent: 108 self.parent = parent 109 self.nsp_cache = {} 110 if nsp: 111 for k, v in nsp.items(): self.nsp_cache[k] = v 112 for attr, val in attrs.items(): 113 if attr == 'xmlns': 114 self.nsd[u''] = val 115 elif attr.startswith('xmlns:'): 116 self.nsd[attr[6:]] = val 117 self.attrs[attr]=attrs[attr] 118 if tag: 119 if node_built: 120 pfx, self.name = (['']+tag.split(':'))[-2:] 121 self.namespace = self.lookup_nsp(pfx) 122 else: 123 if ' ' in tag: 124 self.namespace, self.name = tag.split() 125 else: 126 self.name = tag 127 if isinstance(payload, basestring): payload=[payload] 128 for i in payload: 129 if isinstance(i, Node): 130 self.addChild(node=i) 131 else: 132 self.data.append(ustr(i))
133
134 - def lookup_nsp(self, pfx=''):
135 ns = self.nsd.get(pfx, None) 136 if ns is None: 137 ns = self.nsp_cache.get(pfx, None) 138 if ns is None: 139 if self.parent: 140 ns = self.parent.lookup_nsp(pfx) 141 self.nsp_cache[pfx] = ns 142 else: 143 return 'http://www.gajim.org/xmlns/undeclared' 144 return ns
145
146 - def __str__(self, fancy=0):
147 """ 148 Method used to dump node into textual representation. If "fancy" argument 149 is set to True produces indented output for readability 150 """ 151 s = (fancy-1) * 2 * ' ' + "<" + self.name 152 if self.namespace: 153 if not self.parent or self.parent.namespace!=self.namespace: 154 if 'xmlns' not in self.attrs: 155 s = s + ' xmlns="%s"'%self.namespace 156 for key in self.attrs.keys(): 157 val = ustr(self.attrs[key]) 158 s = s + ' %s="%s"' % ( key, XMLescape(val) ) 159 s = s + ">" 160 cnt = 0 161 if self.kids: 162 if fancy: s = s + "\n" 163 for a in self.kids: 164 if not fancy and (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt]) 165 elif (len(self.data)-1)>=cnt: s=s+XMLescape(self.data[cnt].strip()) 166 if isinstance(a, str) or isinstance(a, unicode): 167 s = s + a.__str__() 168 else: 169 s = s + a.__str__(fancy and fancy+1) 170 cnt=cnt+1 171 if not fancy and (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt]) 172 elif (len(self.data)-1) >= cnt: s = s + XMLescape(self.data[cnt].strip()) 173 if not self.kids and s.endswith('>'): 174 s=s[:-1]+' />' 175 if fancy: s = s + "\n" 176 else: 177 if fancy and not self.data: s = s + (fancy-1) * 2 * ' ' 178 s = s + "</" + self.name + ">" 179 if fancy: s = s + "\n" 180 return s
181
182 - def addChild(self, name=None, attrs={}, payload=[], namespace=None, node=None):
183 """ 184 If "node" argument is provided, adds it as child node. Else creates new 185 node from the other arguments' values and adds it as well 186 """ 187 if 'xmlns' in attrs: 188 raise AttributeError("Use namespace=x instead of attrs={'xmlns':x}") 189 if node: 190 newnode=node 191 node.parent = self 192 else: newnode=Node(tag=name, parent=self, attrs=attrs, payload=payload) 193 if namespace: 194 newnode.setNamespace(namespace) 195 self.kids.append(newnode) 196 return newnode
197
198 - def addData(self, data):
199 """ 200 Add some CDATA to node 201 """ 202 self.data.append(ustr(data))
203
204 - def clearData(self):
205 """ 206 Remove all CDATA from the node 207 """ 208 self.data = []
209
210 - def delAttr(self, key):
211 """ 212 Delete an attribute "key" 213 """ 214 del self.attrs[key]
215
216 - def delChild(self, node, attrs={}):
217 """ 218 Delete the "node" from the node's childs list, if "node" is an instance. 219 Else delete the first node that have specified name and (optionally) 220 attributes 221 """ 222 if not isinstance(node, Node): 223 node = self.getTag(node, attrs) 224 self.kids.remove(node) 225 return node
226
227 - def getAttrs(self):
228 """ 229 Return all node's attributes as dictionary 230 """ 231 return self.attrs
232
233 - def getAttr(self, key):
234 """ 235 Return value of specified attribute 236 """ 237 return self.attrs.get(key)
238
239 - def getChildren(self):
240 """ 241 Return all node's child nodes as list 242 """ 243 return self.kids
244
245 - def getData(self):
246 """ 247 Return all node CDATA as string (concatenated) 248 """ 249 return ''.join(self.data)
250
251 - def getName(self):
252 """ 253 Return the name of node 254 """ 255 return self.name
256
257 - def getNamespace(self):
258 """ 259 Return the namespace of node 260 """ 261 return self.namespace
262
263 - def getParent(self):
264 """ 265 Returns the parent of node (if present) 266 """ 267 return self.parent
268
269 - def getPayload(self):
270 """ 271 Return the payload of node i.e. list of child nodes and CDATA entries. 272 F.e. for "<node>text1<nodea/><nodeb/> text2</node>" will be returned 273 list: ['text1', <nodea instance>, <nodeb instance>, ' text2'] 274 """ 275 ret = [] 276 for i in range(len(self.kids)+len(self.data)+1): 277 try: 278 if self.data[i]: 279 ret.append(self.data[i]) 280 except IndexError: 281 pass 282 try: 283 ret.append(self.kids[i]) 284 except IndexError: 285 pass 286 return ret
287
288 - def getTag(self, name, attrs={}, namespace=None):
289 """ 290 Filter all child nodes using specified arguments as filter. Return the 291 first found or None if not found 292 """ 293 return self.getTags(name, attrs, namespace, one=1)
294
295 - def getTagAttr(self, tag, attr):
296 """ 297 Return attribute value of the child with specified name (or None if no 298 such attribute) 299 """ 300 try: 301 return self.getTag(tag).attrs[attr] 302 except: 303 return None
304
305 - def getTagData(self, tag):
306 """ 307 Return cocatenated CDATA of the child with specified name 308 """ 309 try: 310 return self.getTag(tag).getData() 311 except Exception: 312 return None
313
314 - def getTags(self, name, attrs={}, namespace=None, one=0):
315 """ 316 Filter all child nodes using specified arguments as filter. Returns the 317 list of nodes found 318 """ 319 nodes = [] 320 for node in self.kids: 321 if namespace and namespace != node.getNamespace(): 322 continue 323 if node.getName() == name: 324 for key in attrs.keys(): 325 if key not in node.attrs or node.attrs[key]!=attrs[key]: 326 break 327 else: 328 nodes.append(node) 329 if one and nodes: 330 return nodes[0] 331 if not one: 332 return nodes
333
334 - def iterTags(self, name, attrs={}, namespace=None):
335 """ 336 Iterate over all children using specified arguments as filter 337 """ 338 for node in self.kids: 339 if namespace is not None and namespace != node.getNamespace(): 340 continue 341 if node.getName() == name: 342 for key in attrs.keys(): 343 if key not in node.attrs or \ 344 node.attrs[key]!=attrs[key]: 345 break 346 else: 347 yield node
348
349 - def setAttr(self, key, val):
350 """ 351 Set attribute "key" with the value "val" 352 """ 353 self.attrs[key] = val
354
355 - def setData(self, data):
356 """ 357 Set node's CDATA to provided string. Resets all previous CDATA! 358 """ 359 self.data = [ustr(data)]
360
361 - def setName(self, val):
362 """ 363 Change the node name 364 """ 365 self.name = val
366
367 - def setNamespace(self, namespace):
368 """ 369 Changes the node namespace 370 """ 371 self.namespace = namespace
372
373 - def setParent(self, node):
374 """ 375 Set node's parent to "node". WARNING: do not checks if the parent already 376 present and not removes the node from the list of childs of previous 377 parent 378 """ 379 self.parent = node
380
381 - def setPayload(self, payload, add=0):
382 """ 383 Set node payload according to the list specified. WARNING: completely 384 replaces all node's previous content. If you wish just to add child or 385 CDATA - use addData or addChild methods 386 """ 387 if isinstance(payload, basestring): 388 payload = [payload] 389 if add: 390 self.kids += payload 391 else: 392 self.kids = payload
393
394 - def setTag(self, name, attrs={}, namespace=None):
395 """ 396 Same as getTag but if the node with specified namespace/attributes not 397 found, creates such node and returns it 398 """ 399 node = self.getTags(name, attrs, namespace=namespace, one=1) 400 if node: 401 return node 402 else: 403 return self.addChild(name, attrs, namespace=namespace)
404
405 - def setTagAttr(self, tag, attr, val):
406 """ 407 Create new node (if not already present) with name "tag" and set it's 408 attribute "attr" to value "val" 409 """ 410 try: 411 self.getTag(tag).attrs[attr] = val 412 except Exception: 413 self.addChild(tag, attrs={attr: val})
414
415 - def setTagData(self, tag, val, attrs={}):
416 """ 417 Creates new node (if not already present) with name "tag" and 418 (optionally) attributes "attrs" and sets it's CDATA to string "val" 419 """ 420 try: 421 self.getTag(tag, attrs).setData(ustr(val)) 422 except Exception: 423 self.addChild(tag, attrs, payload = [ustr(val)])
424
425 - def has_attr(self, key):
426 """ 427 Check if node have attribute "key" 428 """ 429 return key in self.attrs
430
431 - def __getitem__(self, item):
432 """ 433 Return node's attribute "item" value 434 """ 435 return self.getAttr(item)
436
437 - def __setitem__(self, item, val):
438 """ 439 Set node's attribute "item" value 440 """ 441 return self.setAttr(item, val)
442
443 - def __delitem__(self, item):
444 """ 445 Delete node's attribute "item" 446 """ 447 return self.delAttr(item)
448
449 - def __contains__(self, item):
450 """ 451 Check if node has attribute "item" 452 """ 453 return self.has_attr(item)
454
455 - def __getattr__(self, attr):
456 """ 457 Reduce memory usage caused by T/NT classes - use memory only when needed 458 """ 459 if attr == 'T': 460 self.T = T(self) 461 return self.T 462 if attr == 'NT': 463 self.NT = NT(self) 464 return self.NT 465 raise AttributeError
466
467 -class T:
468 """ 469 Auxiliary class used to quick access to node's child nodes 470 """ 471
472 - def __init__(self, node):
473 self.__dict__['node'] = node
474
475 - def __getattr__(self, attr):
476 return self.node.setTag(attr)
477
478 - def __setattr__(self, attr, val):
479 if isinstance(val, Node): 480 Node.__init__(self.node.setTag(attr), node=val) 481 else: 482 return self.node.setTagData(attr, val)
483
484 - def __delattr__(self, attr):
485 return self.node.delChild(attr)
486
487 -class NT(T):
488 """ 489 Auxiliary class used to quick create node's child nodes 490 """ 491
492 - def __getattr__(self, attr):
493 return self.node.addChild(attr)
494
495 - def __setattr__(self, attr, val):
496 if isinstance(val, Node): 497 self.node.addChild(attr, node=val) 498 else: 499 return self.node.addChild(attr, payload=[val])
500
501 -class NodeBuilder:
502 """ 503 Builds a Node class minidom from data parsed to it. This class used for two 504 purposes: 505 1. Creation an XML Node from a textual representation. F.e. reading a 506 config file. See an XML2Node method. 507 2. Handling an incoming XML stream. This is done by mangling the 508 __dispatch_depth parameter and redefining the dispatch method. 509 510 You do not need to use this class directly if you do not designing your own 511 XML handler 512 """ 513
514 - def __init__(self, data=None, initial_node=None):
515 """ 516 Take two optional parameters: "data" and "initial_node" 517 518 By default class initialised with empty Node class instance. Though, if 519 "initial_node" is provided it used as "starting point". You can think 520 about it as of "node upgrade". "data" (if provided) feeded to parser 521 immidiatedly after instance init. 522 """ 523 log.debug("Preparing to handle incoming XML stream.") 524 self._parser = xml.parsers.expat.ParserCreate() 525 self._parser.StartElementHandler = self.starttag 526 self._parser.EndElementHandler = self.endtag 527 self._parser.StartNamespaceDeclHandler = self.handle_namespace_start 528 self._parser.CharacterDataHandler = self.handle_cdata 529 self._parser.buffer_text = True 530 self.Parse = self._parser.Parse 531 532 self.__depth = 0 533 self.__last_depth = 0 534 self.__max_depth = 0 535 self._dispatch_depth = 1 536 self._document_attrs = None 537 self._document_nsp = None 538 self._mini_dom=initial_node 539 self.last_is_data = 1 540 self._ptr=None 541 self.data_buffer = None 542 self.streamError = '' 543 if data: 544 self._parser.Parse(data, 1)
545
546 - def check_data_buffer(self):
547 if self.data_buffer: 548 self._ptr.data.append(''.join(self.data_buffer)) 549 del self.data_buffer[:] 550 self.data_buffer = None
551
552 - def destroy(self):
553 """ 554 Method used to allow class instance to be garbage-collected 555 """ 556 self.check_data_buffer() 557 self._parser.StartElementHandler = None 558 self._parser.EndElementHandler = None 559 self._parser.CharacterDataHandler = None 560 self._parser.StartNamespaceDeclHandler = None
561
562 - def starttag(self, tag, attrs):
563 """ 564 XML Parser callback. Used internally 565 """ 566 self.check_data_buffer() 567 self._inc_depth() 568 log.info("STARTTAG.. DEPTH -> %i , tag -> %s, attrs -> %s" % (self.__depth, tag, `attrs`)) 569 if self.__depth == self._dispatch_depth: 570 if not self._mini_dom : 571 self._mini_dom = Node(tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) 572 else: 573 Node.__init__(self._mini_dom, tag=tag, attrs=attrs, nsp = self._document_nsp, node_built=True) 574 self._ptr = self._mini_dom 575 elif self.__depth > self._dispatch_depth: 576 self._ptr.kids.append(Node(tag=tag, parent=self._ptr, attrs=attrs, node_built=True)) 577 self._ptr = self._ptr.kids[-1] 578 if self.__depth == 1: 579 self._document_attrs = {} 580 self._document_nsp = {} 581 nsp, name = (['']+tag.split(':'))[-2:] 582 for attr, val in attrs.items(): 583 if attr == 'xmlns': 584 self._document_nsp[u''] = val 585 elif attr.startswith('xmlns:'): 586 self._document_nsp[attr[6:]] = val 587 else: 588 self._document_attrs[attr] = val 589 ns = self._document_nsp.get(nsp, 'http://www.gajim.org/xmlns/undeclared-root') 590 try: 591 self.stream_header_received(ns, name, attrs) 592 except ValueError, e: 593 self._document_attrs = None 594 raise ValueError(str(e)) 595 if not self.last_is_data and self._ptr.parent: 596 self._ptr.parent.data.append('') 597 self.last_is_data = 0
598
599 - def endtag(self, tag ):
600 """ 601 XML Parser callback. Used internally 602 """ 603 log.info("DEPTH -> %i , tag -> %s" % (self.__depth, tag)) 604 self.check_data_buffer() 605 if self.__depth == self._dispatch_depth: 606 if self._mini_dom.getName() == 'error': 607 children = self._mini_dom.getChildren() 608 if children: 609 self.streamError = children[0].getName() 610 else: 611 self.streamError = self._mini_dom.getData() 612 self.dispatch(self._mini_dom) 613 elif self.__depth > self._dispatch_depth: 614 self._ptr = self._ptr.parent 615 else: 616 log.info("Got higher than dispatch level. Stream terminated?") 617 self._dec_depth() 618 self.last_is_data = 0 619 if self.__depth == 0: self.stream_footer_received()
620
621 - def handle_cdata(self, data):
622 if self.last_is_data: 623 if self.data_buffer: 624 self.data_buffer.append(data) 625 elif self._ptr: 626 self.data_buffer = [data] 627 self.last_is_data = 1
628
629 - def handle_namespace_start(self, prefix, uri):
630 """ 631 XML Parser callback. Used internally 632 """ 633 self.check_data_buffer()
634
635 - def getDom(self):
636 """ 637 Return just built Node 638 """ 639 self.check_data_buffer() 640 return self._mini_dom
641
642 - def dispatch(self, stanza):
643 """ 644 Get called when the NodeBuilder reaches some level of depth on it's way 645 up with the built node as argument. Can be redefined to convert incoming 646 XML stanzas to program events 647 """ 648 pass
649
650 - def stream_header_received(self, ns, tag, attrs):
651 """ 652 Method called when stream just opened 653 """ 654 self.check_data_buffer()
655 661
662 - def has_received_endtag(self, level=0):
663 """ 664 Return True if at least one end tag was seen (at level) 665 """ 666 return self.__depth <= level and self.__max_depth > level
667
668 - def _inc_depth(self):
669 self.__last_depth = self.__depth 670 self.__depth += 1 671 self.__max_depth = max(self.__depth, self.__max_depth)
672
673 - def _dec_depth(self):
674 self.__last_depth = self.__depth 675 self.__depth -= 1
676
677 -def XML2Node(xml):
678 """ 679 Convert supplied textual string into XML node. Handy f.e. for reading 680 configuration file. Raises xml.parser.expat.parsererror if provided string 681 is not well-formed XML 682 """ 683 return NodeBuilder(xml).getDom()
684
685 -def BadXML2Node(xml):
686 """ 687 Convert supplied textual string into XML node. Survives if xml data is 688 cutted half way round. I.e. "<html>some text <br>some more text". Will raise 689 xml.parser.expat.parsererror on misplaced tags though. F.e. "<b>some text 690 <br>some more text</b>" will not work 691 """ 692 return NodeBuilder(xml).getDom()
693