Source code for regparser.grammar.tokens

""" Set of Tokens to be used when parsing.
    @label is a list describing the depth of a paragraph/context. It follows:
    [ Part, Subpart/Appendix/Interpretations, Section, p-level-1, p-level-2,
    p-level-3, p-level4, p-level5 ]
"""
import attr
import six


[docs]def uncertain_label(label_parts):
    """Convert a list of strings/Nones to a '-'-separated string with question
    markers to replace the Nones. We use this format to indicate
    uncertainty"""
    return '-'.join(p or '?' for p in label_parts)


def _none_str(value):
    """Shorthand for displaying a variable as a string or the text None"""
    if value is None:
        return 'None'
    else:
        return "'{0}'".format(value)


@attr.attrs(frozen=True)
[docs]class Token(object):
    """Base class for all tokens. Provides methods for pattern matching and
    copying this token"""
[docs]    def match(self, *types, **fields):
        """Pattern match. self must be one of the types provided (if they
        were provided) and all of the fields must match (if fields were
        provided). If a successful match, returns self"""
        type_match = not types or any(isinstance(self, typ) for typ in types)
        has_fields = not fields or all(hasattr(self, f) for f in fields)
        fields_match = not has_fields or all(
            getattr(self, f) == v for f, v in fields.items())
        return type_match and has_fields and fields_match and self


@attr.attrs(slots=True, frozen=True)
[docs]class Verb(Token):
    """Represents what action is taking place to the paragraphs"""
    verb = attr.attrib()
    active = attr.attrib()
    and_prefix = attr.attrib(default=False)

    PUT = 'PUT'
    POST = 'POST'
    MOVE = 'MOVE'
    DELETE = 'DELETE'
    DESIGNATE = 'DESIGNATE'
    RESERVE = 'RESERVE'
    KEEP = 'KEEP'
    INSERT = 'INSERT'


@attr.attrs(slots=True, frozen=True)
[docs]class Context(Token):
    """Represents a bit of context for the paragraphs. This gets compressed
    with the paragraph tokens to define the full scope of a paragraph. To
    complicate matters, sometimes what looks like a Context is actually the
    entity which is being modified (i.e. a paragraph). If we are certain
    that this is only context, (e.g. "In Subpart A"), use 'certain'"""
    # replace with Nones
    label = attr.attrib(convert=lambda label: [p or None for p in label])
    certain = attr.attrib(default=False)


@attr.attrs(slots=True, frozen=True)
[docs]class Paragraph(Token):
    """Represents an entity which is being modified by the amendment. Label
    is a way to locate this paragraph (though see the above note). We might
    be modifying a field of a paragraph (e.g. intro text only, or title
    only;) if so, set the `field` parameter."""
    label = attr.attrib(default=attr.Factory(list))
    field = attr.attrib(default=None)

    TEXT_FIELD = 'text'
    HEADING_FIELD = 'title'
    KEYTERM_FIELD = 'heading'

    @classmethod
[docs]    def make(cls, label=None, field=None, part=None, sub=None, section=None,
             paragraphs=None, paragraph=None, subpart=None, is_interp=None,
             appendix=None):
        """label and field are the only "materialized" fields. Everything
        other field becomes part of the label, offering a more legible API.
        Particularly useful for writing tests"""
        if sub is None and subpart:
            if isinstance(subpart, six.string_types):
                sub = 'Subpart:{0}'.format(subpart)
            else:
                sub = 'Subpart'
        if sub is None and is_interp:
            sub = 'Interpretations'
        if sub is None and appendix:
            sub = 'Appendix:' + appendix
        if paragraph:
            paragraphs = [paragraph]
        if label is None:
            label = [part, sub, section] + (paragraphs or [])
        # replace with Nones
        label = [p or None for p in label]
        # Trim the right side of the list
        while label and not label[-1]:
            label.pop()
        return cls(label, field)

[docs]    def label_text(self):
        """Converts self.label into a string"""
        label = uncertain_label(self.label)
        if self.field:
            label += '[{0}]'.format(self.field)
        return label


@attr.attrs(slots=True, frozen=True)
[docs]class TokenList(Token):
    """Represents a sequence of other tokens, e.g. comma separated of
    created via "through" """
    tokens = attr.attrib()

    def __iter__(self):
        return iter(self.tokens)


@attr.attrs(slots=True, frozen=True)
[docs]class AndToken(Token):
    """The word 'and' can help us determine if a Context token should be a
    Paragraph token. Note that 'and' might also trigger the creation of a
    TokenList, which takes precedent"""