1   
  2   
  3  """ 
  4  SAX-based adapter to copy trees from/to the Python standard library. 
  5   
  6  Use the `ElementTreeContentHandler` class to build an ElementTree from 
  7  SAX events. 
  8   
  9  Use the `ElementTreeProducer` class or the `saxify()` function to fire 
 10  the SAX events of an ElementTree against a SAX ContentHandler. 
 11   
 12  See http://codespeak.net/lxml/sax.html 
 13  """ 
 14   
 15  from __future__ import absolute_import 
 16   
 17  from xml.sax.handler import ContentHandler 
 18  from lxml import etree 
 19  from lxml.etree import ElementTree, SubElement 
 20  from lxml.etree import Comment, ProcessingInstruction 
 21   
 22   
 24      """General SAX error. 
 25      """ 
  26   
 27   
 29      if tag[0] == '{': 
 30          return tuple(tag[1:].split('}', 1)) 
 31      else: 
 32          return None, tag 
  33   
 34   
 35 -class ElementTreeContentHandler(ContentHandler): 
  36      """Build an lxml ElementTree from SAX events. 
 37      """ 
 38 -    def __init__(self, makeelement=None): 
  39          ContentHandler.__init__(self) 
 40          self._root = None 
 41          self._root_siblings = [] 
 42          self._element_stack = [] 
 43          self._default_ns = None 
 44          self._ns_mapping = { None : [None] } 
 45          self._new_mappings = {} 
 46          if makeelement is None: 
 47              makeelement = etree.Element 
 48          self._makeelement = makeelement 
  49   
 50 -    def _get_etree(self): 
  51          "Contains the generated ElementTree after parsing is finished." 
 52          return ElementTree(self._root) 
  53   
 54      etree = property(_get_etree, doc=_get_etree.__doc__) 
 55   
 56 -    def setDocumentLocator(self, locator): 
  58   
 59 -    def startDocument(self): 
  61   
 62 -    def endDocument(self): 
  64   
 65 -    def startPrefixMapping(self, prefix, uri): 
  66          self._new_mappings[prefix] = uri 
 67          try: 
 68              self._ns_mapping[prefix].append(uri) 
 69          except KeyError: 
 70              self._ns_mapping[prefix] = [uri] 
 71          if prefix is None: 
 72              self._default_ns = uri 
  73   
 74 -    def endPrefixMapping(self, prefix): 
  75          ns_uri_list = self._ns_mapping[prefix] 
 76          ns_uri_list.pop() 
 77          if prefix is None: 
 78              self._default_ns = ns_uri_list[-1] 
  79   
 80 -    def _buildTag(self, ns_name_tuple): 
  81          ns_uri, local_name = ns_name_tuple 
 82          if ns_uri: 
 83              el_tag = "{%s}%s" % ns_name_tuple 
 84          elif self._default_ns: 
 85              el_tag = "{%s}%s" % (self._default_ns, local_name) 
 86          else: 
 87              el_tag = local_name 
 88          return el_tag 
  89   
 90 -    def startElementNS(self, ns_name, qname, attributes=None): 
  91          el_name = self._buildTag(ns_name) 
 92          if attributes: 
 93              attrs = {} 
 94              try: 
 95                  iter_attributes = attributes.iteritems() 
 96              except AttributeError: 
 97                  iter_attributes = attributes.items() 
 98   
 99              for name_tuple, value in iter_attributes: 
100                  if name_tuple[0]: 
101                      attr_name = "{%s}%s" % name_tuple 
102                  else: 
103                      attr_name = name_tuple[1] 
104                  attrs[attr_name] = value 
105          else: 
106              attrs = None 
107   
108          element_stack = self._element_stack 
109          if self._root is None: 
110              element = self._root = \ 
111                        self._makeelement(el_name, attrs, self._new_mappings) 
112              if self._root_siblings and hasattr(element, 'addprevious'): 
113                  for sibling in self._root_siblings: 
114                      element.addprevious(sibling) 
115              del self._root_siblings[:] 
116          else: 
117              element = SubElement(element_stack[-1], el_name, 
118                                   attrs, self._new_mappings) 
119          element_stack.append(element) 
120   
121          self._new_mappings.clear() 
 122   
123 -    def processingInstruction(self, target, data): 
 124          pi = ProcessingInstruction(target, data) 
125          if self._root is None: 
126              self._root_siblings.append(pi) 
127          else: 
128              self._element_stack[-1].append(pi) 
 129   
130 -    def endElementNS(self, ns_name, qname): 
 131          element = self._element_stack.pop() 
132          el_tag = self._buildTag(ns_name) 
133          if el_tag != element.tag: 
134              raise SaxError("Unexpected element closed: " + el_tag) 
 135   
136 -    def startElement(self, name, attributes=None): 
 137          if attributes: 
138              attributes = dict( 
139                      [((None, k), v) for k, v in attributes.items()] 
140                  ) 
141          self.startElementNS((None, name), name, attributes) 
 142   
143 -    def endElement(self, name): 
 144          self.endElementNS((None, name), name) 
 145   
146 -    def characters(self, data): 
 147          last_element = self._element_stack[-1] 
148          try: 
149               
150              last_element = last_element[-1] 
151              last_element.tail = (last_element.tail or '') + data 
152          except IndexError: 
153               
154              last_element.text = (last_element.text or '') + data 
 155   
156      ignorableWhitespace = characters 
 157   
158   
160      """Produces SAX events for an element and children. 
161      """ 
162 -    def __init__(self, element_or_tree, content_handler): 
 163          try: 
164              element = element_or_tree.getroot() 
165          except AttributeError: 
166              element = element_or_tree 
167          self._element = element 
168          self._content_handler = content_handler 
169          from xml.sax.xmlreader import AttributesNSImpl as attr_class 
170          self._attr_class = attr_class 
171          self._empty_attributes = attr_class({}, {}) 
 172   
195   
197          content_handler = self._content_handler 
198          tag = element.tag 
199          if tag is Comment or tag is ProcessingInstruction: 
200              if tag is ProcessingInstruction: 
201                  content_handler.processingInstruction( 
202                      element.target, element.text) 
203              tail = element.tail 
204              if tail: 
205                  content_handler.characters(tail) 
206              return 
207   
208          element_nsmap = element.nsmap 
209          new_prefixes = [] 
210          if element_nsmap != parent_nsmap: 
211               
212              for prefix, ns_uri in element_nsmap.items(): 
213                  if parent_nsmap.get(prefix) != ns_uri: 
214                      new_prefixes.append( (prefix, ns_uri) ) 
215   
216          attribs = element.items() 
217          if attribs: 
218              attr_values = {} 
219              attr_qnames = {} 
220              for attr_ns_name, value in attribs: 
221                  attr_ns_tuple = _getNsTag(attr_ns_name) 
222                  attr_values[attr_ns_tuple] = value 
223                  attr_qnames[attr_ns_tuple] = self._build_qname( 
224                      attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap, 
225                      preferred_prefix=None, is_attribute=True) 
226              sax_attributes = self._attr_class(attr_values, attr_qnames) 
227          else: 
228              sax_attributes = self._empty_attributes 
229   
230          ns_uri, local_name = _getNsTag(tag) 
231          qname = self._build_qname( 
232              ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False) 
233   
234          for prefix, uri in new_prefixes: 
235              content_handler.startPrefixMapping(prefix, uri) 
236          content_handler.startElementNS( 
237              (ns_uri, local_name), qname, sax_attributes) 
238          text = element.text 
239          if text: 
240              content_handler.characters(text) 
241          for child in element: 
242              self._recursive_saxify(child, element_nsmap) 
243          content_handler.endElementNS((ns_uri, local_name), qname) 
244          for prefix, uri in new_prefixes: 
245              content_handler.endPrefixMapping(prefix) 
246          tail = element.tail 
247          if tail: 
248              content_handler.characters(tail) 
 249   
250 -    def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, is_attribute): 
 251          if ns_uri is None: 
252              return local_name 
253   
254          if not is_attribute and nsmap.get(preferred_prefix) == ns_uri: 
255              prefix = preferred_prefix 
256          else: 
257               
258              candidates = [ 
259                  pfx for (pfx, uri) in nsmap.items() 
260                  if pfx is not None and uri == ns_uri 
261              ] 
262              prefix = ( 
263                  candidates[0] if len(candidates) == 1 
264                  else min(candidates) if candidates 
265                  else None 
266              ) 
267   
268          if prefix is None: 
269               
270              return local_name 
271          return prefix + ':' + local_name 
  272   
273   
274 -def saxify(element_or_tree, content_handler): 
 275      """One-shot helper to generate SAX events from an XML tree and fire 
276      them against a SAX ContentHandler. 
277      """ 
278      return ElementTreeProducer(element_or_tree, content_handler).saxify() 
 279