Source code for regparser.tree.xml_parser.xml_wrapper

from copy import deepcopy

import six
from lxml import etree

from regparser import plugins
from regparser.tree.xml_parser.preprocessors import replace_html_entities


[docs]class XMLWrapper(object): """Wrapper around XML which provides a consistent interface shared by both Notices and Annual editions of XML""" def __init__(self, xml, source=None): """Includes automatic conversion from string and a deep copy for safety. `source` represents the providence of this xml. It is _not_ serialized and hence does not follow the xml through the index""" if isinstance(xml, six.binary_type): xml = replace_html_entities(xml) self.xml = etree.fromstring(xml) elif isinstance(xml, etree._Element): self.xml = deepcopy(xml) else: raise ValueError("xml should be either binary or an lxml node") self.source = source
[docs] def preprocess(self): """Unfortunately, the notice xml is often inaccurate. This function attempts to fix some of those (general) flaws. For specific issues, we tend to instead use the files in settings.LOCAL_XML_PATHS""" for plugin in plugins.instantiate_if_possible( 'eregs_ns.parser.preprocessors', method_name='transform'): plugin(self.xml) return self
[docs] def xpath(self, *args, **kwargs): return self.xml.xpath(*args, **kwargs)
[docs] def xml_str(self): return etree.tounicode(self.xml, pretty_print=True)
def _find_or_create(self, tag): """Look for the first matching tag present in the document. If it's not present, create it by inserting it into the root""" matches = self.xpath('//' + tag) if matches: return matches[0] else: return etree.SubElement(self.xml, tag)