Source code for regparser.tree.xml_parser.flatsubtree_processor

from regparser.tree.depth import markers as mtypes
from regparser.tree.struct import Node
from regparser.tree.xml_parser import (paragraph_processor,
                                       simple_hierarchy_processor, us_code)


[docs]class FlatParagraphProcessor(paragraph_processor.ParagraphProcessor): """Paragraph Processor which does not try to derive paragraph markers""" MATCHERS = [paragraph_processor.StarsMatcher(), paragraph_processor.TableMatcher(), simple_hierarchy_processor.SimpleHierarchyMatcher( ['NOTE', 'NOTES'], Node.NOTE), paragraph_processor.HeaderMatcher(), paragraph_processor.SimpleTagMatcher('P', 'FP'), us_code.USCodeMatcher(), paragraph_processor.GraphicsMatcher(), paragraph_processor.IgnoreTagMatcher('PRTPAGE')]
[docs]class FlatsubtreeMatcher(paragraph_processor.BaseMatcher): """ Detects tags passed to it on init and processes them with the FlatParagraphProcessor. Also optionally sets node_type. """ def __init__(self, tags, node_type=Node.REGTEXT): self.tags = list(tags) self.node_type = node_type
[docs] def matches(self, xml): return xml.tag in self.tags
[docs] def derive_nodes(self, xml, processor=None): processor = FlatParagraphProcessor() text = (xml.text or '').strip() node = Node(text=text, node_type=self.node_type, label=[mtypes.MARKERLESS]) return [processor.process(xml, node)]