Source code for weaver.xml_util

Define a default XML parser that avoids XXE injection.

Package :mod:`lxml` is employed directly even though some linters (e.g.: ``bandit``) report to employ ``defusedxml``
instead, because that package's extension with ``lxml`` is marked as deprecated.

.. seealso::

To use the module, import is as if importing ``lxml.etree``:

.. code-block:: python

    from weaver.xml_util import XML  # ElementTree
    from weaver import xml_util

    data = xml_util.fromstring("<xml>content</xml>")
from typing import TYPE_CHECKING

from lxml import etree as lxml_etree  # nosec: B410  # flagged known issue, this is what the applied fix below is about
from owslib.wps import etree as owslib_wps_etree

    from io import BufferedReader
    from typing import Union

[docs]XML_PARSER = lxml_etree.XMLParser( # security fix: XML external entity (XXE) injection # # # based on: # resolve_entities=False, # avoid failing parsing if some characters are not correctly escaped # based on: # recover=True, # attempt, no guarantee
[docs]tostring = lxml_etree.tostring
[docs]Element = lxml_etree.Element
[docs]ParseError = lxml_etree.ParseError
# define this type here so that code can use it for actual logic without repeating 'noqa'
[docs]XML = lxml_etree._Element # noqa
# save a local reference to method employed by OWSLib directly called
[docs]_lxml_fromstring = lxml_etree.fromstring
def fromstring(text, parser=XML_PARSER): # type: (str, lxml_etree.XMLParser) -> XML return _lxml_fromstring(text, parser=parser) # nosec: B410
[docs]def parse(source, parser=XML_PARSER): # type: (Union[str, BufferedReader], lxml_etree.XMLParser) -> XML return lxml_etree.parse(source, parser=parser) # nosec: B410
# override OWSLib call with adjusted method reference with configured parser enforced
[docs]owslib_wps_etree.fromstring = fromstring