Source code for weaver.xml_util

"""
Define a default XML parser that avoids XXE injection.

Package :mod:`lxml` is employed directly even though some linters (e.g.: ``bandit``) report to employ ``defusedxml``
instead, because that package's extension with ``lxml`` is marked as deprecated.

.. seealso::
    https://pypi.org/project/defusedxml/#defusedxml-lxml

To use the module, import is as if importing ``lxml.etree``:

.. code-block:: python

    from weaver.xml_util import XML  # ElementTree
    from weaver import xml_util

    data = xml_util.fromstring("<xml>content</xml>")
"""

from lxml import etree  # nosec: B410  # flagged issue is known, this is what the applied fix below is about

# security fix: XML external entity (XXE) injection
#   https://lxml.de/parsing.html#parser-options
#   https://nvd.nist.gov/vuln/detail/CVE-2021-39371
# based on:
#   https://github.com/geopython/pywps/pull/616
[docs]XML_PARSER = etree.XMLParser( resolve_entities=False,
)
[docs]tostring = etree.tostring
[docs]Element = etree.Element
# define this type here so that code can use it for actual logic without repeating 'noqa'
[docs]XML = etree._Element # noqa
[docs]def fromstring(text): return etree.fromstring(text, parser=XML_PARSER) # nosec: B410
[docs]def parse(source): return etree.parse(source, parser=XML_PARSER) # nosec: B410