%PDF- %PDF-
Direktori : /proc/self/root/proc/self/root/opt/hc_python/lib/python3.8/site-packages/lxml/ |
Current File : //proc/self/root/proc/self/root/opt/hc_python/lib/python3.8/site-packages/lxml/ElementInclude.py |
# # ElementTree # $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $ # # limited xinclude support for element trees # # history: # 2003-08-15 fl created # 2003-11-14 fl fixed default loader # # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. # # fredrik@pythonware.com # http://www.pythonware.com # # -------------------------------------------------------------------- # The ElementTree toolkit is # # Copyright (c) 1999-2004 by Fredrik Lundh # # By obtaining, using, and/or copying this software and/or its # associated documentation, you agree that you have read, understood, # and will comply with the following terms and conditions: # # Permission to use, copy, modify, and distribute this software and # its associated documentation for any purpose and without fee is # hereby granted, provided that the above copyright notice appears in # all copies, and that both that copyright notice and this permission # notice appear in supporting documentation, and that the name of # Secret Labs AB or the author not be used in advertising or publicity # pertaining to distribution of the software without specific, written # prior permission. # # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE # OF THIS SOFTWARE. # -------------------------------------------------------------------- """ Limited XInclude support for the ElementTree package. While lxml.etree has full support for XInclude (see `etree.ElementTree.xinclude()`), this module provides a simpler, pure Python, ElementTree compatible implementation that supports a simple form of custom URL resolvers. """ from lxml import etree try: from urlparse import urljoin from urllib2 import urlopen except ImportError: # Python 3 from urllib.parse import urljoin from urllib.request import urlopen XINCLUDE = "{http://www.w3.org/2001/XInclude}" XINCLUDE_INCLUDE = XINCLUDE + "include" XINCLUDE_FALLBACK = XINCLUDE + "fallback" XINCLUDE_ITER_TAG = XINCLUDE + "*" # For security reasons, the inclusion depth is limited to this read-only value by default. DEFAULT_MAX_INCLUSION_DEPTH = 6 ## # Fatal include error. class FatalIncludeError(etree.LxmlSyntaxError): pass class LimitedRecursiveIncludeError(FatalIncludeError): pass ## # ET compatible default loader. # This loader reads an included resource from disk. # # @param href Resource reference. # @param parse Parse mode. Either "xml" or "text". # @param encoding Optional text encoding. # @return The expanded resource. If the parse mode is "xml", this # is an ElementTree instance. If the parse mode is "text", this # is a Unicode string. If the loader fails, it can return None # or raise an IOError exception. # @throws IOError If the loader fails to load the resource. def default_loader(href, parse, encoding=None): file = open(href, 'rb') if parse == "xml": data = etree.parse(file).getroot() else: data = file.read() if not encoding: encoding = 'utf-8' data = data.decode(encoding) file.close() return data ## # Default loader used by lxml.etree - handles custom resolvers properly # def _lxml_default_loader(href, parse, encoding=None, parser=None): if parse == "xml": data = etree.parse(href, parser).getroot() else: if "://" in href: f = urlopen(href) else: f = open(href, 'rb') data = f.read() f.close() if not encoding: encoding = 'utf-8' data = data.decode(encoding) return data ## # Wrapper for ET compatibility - drops the parser def _wrap_et_loader(loader): def load(href, parse, encoding=None, parser=None): return loader(href, parse, encoding) return load ## # Expand XInclude directives. # # @param elem Root element. # @param loader Optional resource loader. If omitted, it defaults # to {@link default_loader}. If given, it should be a callable # that implements the same interface as <b>default_loader</b>. # @param base_url The base URL of the original file, to resolve # relative include file references. # @param max_depth The maximum number of recursive inclusions. # Limited to reduce the risk of malicious content explosion. # Pass None to disable the limitation. # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. # @throws FatalIncludeError If the function fails to include a given # resource, or if the tree contains malformed XInclude elements. # @throws IOError If the function fails to load a given resource. # @returns the node or its replacement if it was an XInclude node def include(elem, loader=None, base_url=None, max_depth=DEFAULT_MAX_INCLUSION_DEPTH): if max_depth is None: max_depth = -1 elif max_depth < 0: raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) if base_url is None: if hasattr(elem, 'getroot'): tree = elem elem = elem.getroot() else: tree = elem.getroottree() if hasattr(tree, 'docinfo'): base_url = tree.docinfo.URL elif hasattr(elem, 'getroot'): elem = elem.getroot() _include(elem, loader, base_url, max_depth) def _include(elem, loader=None, base_url=None, max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None): if loader is not None: load_include = _wrap_et_loader(loader) else: load_include = _lxml_default_loader if _parent_hrefs is None: _parent_hrefs = set() parser = elem.getroottree().parser include_elements = list( elem.iter(XINCLUDE_ITER_TAG)) for e in include_elements: if e.tag == XINCLUDE_INCLUDE: # process xinclude directive href = urljoin(base_url, e.get("href")) parse = e.get("parse", "xml") parent = e.getparent() if parse == "xml": if href in _parent_hrefs: raise FatalIncludeError( "recursive include of %r detected" % href ) if max_depth == 0: raise LimitedRecursiveIncludeError( "maximum xinclude depth reached when including file %s" % href) node = load_include(href, parse, parser=parser) if node is None: raise FatalIncludeError( "cannot load %r as %r" % (href, parse) ) node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs) if e.tail: node.tail = (node.tail or "") + e.tail if parent is None: return node # replaced the root node! parent.replace(e, node) elif parse == "text": text = load_include(href, parse, encoding=e.get("encoding")) if text is None: raise FatalIncludeError( "cannot load %r as %r" % (href, parse) ) predecessor = e.getprevious() if predecessor is not None: predecessor.tail = (predecessor.tail or "") + text elif parent is None: return text # replaced the root node! else: parent.text = (parent.text or "") + text + (e.tail or "") parent.remove(e) else: raise FatalIncludeError( "unknown parse type in xi:include tag (%r)" % parse ) elif e.tag == XINCLUDE_FALLBACK: parent = e.getparent() if parent is not None and parent.tag != XINCLUDE_INCLUDE: raise FatalIncludeError( "xi:fallback tag must be child of xi:include (%r)" % e.tag ) else: raise FatalIncludeError( "Invalid element found in XInclude namespace (%r)" % e.tag ) return elem