Source code for regparser.grammar.tokens

""" Set of Tokens to be used when parsing.
    @label is a list describing the depth of a paragraph/context. It follows:
    [ Part, Subpart/Appendix/Interpretations, Section, p-level-1, p-level-2,
    p-level-3, p-level4, p-level5 ]
"""
import attr
import six


[docs]def uncertain_label(label_parts): """Convert a list of strings/Nones to a '-'-separated string with question markers to replace the Nones. We use this format to indicate uncertainty""" return '-'.join(p or '?' for p in label_parts)
def _none_str(value): """Shorthand for displaying a variable as a string or the text None""" if value is None: return 'None' else: return "'{0}'".format(value) @attr.attrs(frozen=True)
[docs]class Token(object): """Base class for all tokens. Provides methods for pattern matching and copying this token"""
[docs] def match(self, *types, **fields): """Pattern match. self must be one of the types provided (if they were provided) and all of the fields must match (if fields were provided). If a successful match, returns self""" type_match = not types or any(isinstance(self, typ) for typ in types) has_fields = not fields or all(hasattr(self, f) for f in fields) fields_match = not has_fields or all( getattr(self, f) == v for f, v in fields.items()) return type_match and has_fields and fields_match and self
@attr.attrs(slots=True, frozen=True)
[docs]class Verb(Token): """Represents what action is taking place to the paragraphs""" verb = attr.attrib() active = attr.attrib() and_prefix = attr.attrib(default=False) PUT = 'PUT' POST = 'POST' MOVE = 'MOVE' DELETE = 'DELETE' DESIGNATE = 'DESIGNATE' RESERVE = 'RESERVE' KEEP = 'KEEP' INSERT = 'INSERT'
@attr.attrs(slots=True, frozen=True)
[docs]class Context(Token): """Represents a bit of context for the paragraphs. This gets compressed with the paragraph tokens to define the full scope of a paragraph. To complicate matters, sometimes what looks like a Context is actually the entity which is being modified (i.e. a paragraph). If we are certain that this is only context, (e.g. "In Subpart A"), use 'certain'""" # replace with Nones label = attr.attrib(convert=lambda label: [p or None for p in label]) certain = attr.attrib(default=False)
@attr.attrs(slots=True, frozen=True)
[docs]class Paragraph(Token): """Represents an entity which is being modified by the amendment. Label is a way to locate this paragraph (though see the above note). We might be modifying a field of a paragraph (e.g. intro text only, or title only;) if so, set the `field` parameter.""" label = attr.attrib(default=attr.Factory(list)) field = attr.attrib(default=None) TEXT_FIELD = 'text' HEADING_FIELD = 'title' KEYTERM_FIELD = 'heading' @classmethod
[docs] def make(cls, label=None, field=None, part=None, sub=None, section=None, paragraphs=None, paragraph=None, subpart=None, is_interp=None, appendix=None): """label and field are the only "materialized" fields. Everything other field becomes part of the label, offering a more legible API. Particularly useful for writing tests""" if sub is None and subpart: if isinstance(subpart, six.string_types): sub = 'Subpart:{0}'.format(subpart) else: sub = 'Subpart' if sub is None and is_interp: sub = 'Interpretations' if sub is None and appendix: sub = 'Appendix:' + appendix if paragraph: paragraphs = [paragraph] if label is None: label = [part, sub, section] + (paragraphs or []) # replace with Nones label = [p or None for p in label] # Trim the right side of the list while label and not label[-1]: label.pop() return cls(label, field)
[docs] def label_text(self): """Converts self.label into a string""" label = uncertain_label(self.label) if self.field: label += '[{0}]'.format(self.field) return label
@attr.attrs(slots=True, frozen=True)
[docs]class TokenList(Token): """Represents a sequence of other tokens, e.g. comma separated of created via "through" """ tokens = attr.attrib() def __iter__(self): return iter(self.tokens)
@attr.attrs(slots=True, frozen=True)
[docs]class AndToken(Token): """The word 'and' can help us determine if a Context token should be a Paragraph token. Note that 'and' might also trigger the creation of a TokenList, which takes precedent"""