Source code for regparser.grammar.unified

# -*- coding: utf-8 -*-
"""Some common combinations"""
from pyparsing import (FollowedBy, LineEnd, Literal, OneOrMore, Optional,
                       SkipTo, Suppress, ZeroOrMore)

from regparser.grammar import atomic
from regparser.grammar.utils import Marker, QuickSearchable, keep_pos

period_section = Suppress(".") + atomic.section
part_section = atomic.part + period_section
marker_part_section = (
    keep_pos(atomic.section_marker).setResultsName("marker") +
    part_section)

depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p
depth5_p = (
    (atomic.em_digit_p | atomic.plaintext_level5_p) +
    Optional(depth6_p))
depth4_p = atomic.upper_p + Optional(depth5_p)
depth3_p = atomic.roman_p + Optional(depth4_p)
depth2_p = atomic.digit_p + Optional(depth3_p)
depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p)
any_depth_p = QuickSearchable(
    depth1_p | depth2_p | depth3_p | depth4_p | depth5_p | depth6_p)

depth3_c = atomic.upper_c + Optional(atomic.em_digit_c)
depth2_c = atomic.roman_c + Optional(depth3_c)
depth1_c = atomic.digit_c + Optional(depth2_c)
any_a = atomic.upper_a | atomic.digit_a

section_comment = atomic.section + depth1_c

section_paragraph = QuickSearchable(atomic.section + depth1_p)

mps_paragraph = QuickSearchable(marker_part_section + Optional(depth1_p))
ps_paragraph = part_section + Optional(depth1_p)
part_section_paragraph = QuickSearchable(
    atomic.part + Suppress(".") + atomic.section + depth1_p)


m_section_paragraph = QuickSearchable(
    keep_pos(atomic.paragraph_marker).setResultsName("marker") +
    atomic.section +
    depth1_p)

marker_paragraph = QuickSearchable(
    keep_pos(
        atomic.paragraph_marker | atomic.paragraphs_marker
    ).setResultsName("marker") +
    depth1_p)


[docs]def appendix_section(match):
    """Appendices may have parenthetical paragraphs in its section number."""
    if match.appendix_digit:
        lst = list(match)
        pars = lst[lst.index(match.appendix_digit) + 1:]
        section = match.appendix_digit
        if pars:
            section += '(' + ')('.join(el for el in pars) + ')'
        return section
    else:
        return None


appendix_with_section = QuickSearchable(
    atomic.appendix +
    '-' +
    (atomic.appendix_digit +
     ZeroOrMore(atomic.lower_p | atomic.roman_p | atomic.digit_p |
                atomic.upper_p)).setParseAction(
                    appendix_section).setResultsName("appendix_section"),
    # optimization: encode the regex
    force_regex_str=r"[A-Z]+[0-9]*\b\s*-")

appendix_with_part = QuickSearchable(
    keep_pos(atomic.appendix_marker).setResultsName("marker") +
    atomic.appendix +
    Suppress(",") + Marker('part') +
    atomic.upper_roman_a +
    Optional(any_a) + Optional(any_a) + Optional(any_a))

marker_appendix = QuickSearchable(
    keep_pos(atomic.appendix_marker).setResultsName("marker") +
    (appendix_with_section | atomic.appendix))

marker_part = (
    keep_pos(atomic.part_marker).setResultsName("marker") +
    atomic.part)

marker_subpart = (
    keep_pos(atomic.subpart_marker).setResultsName("marker") +
    atomic.subpart)

marker_subpart_title = (
    keep_pos(atomic.subpart_marker).setResultsName("marker") +
    atomic.subpart +
    Optional(Suppress(Literal(u"—"))) +
    SkipTo(LineEnd()).setResultsName("subpart_title")
)

marker_comment = QuickSearchable(
    keep_pos(atomic.comment_marker).setResultsName("marker") +
    (section_comment | section_paragraph | ps_paragraph | mps_paragraph) +
    Optional(depth1_c)
)


[docs]def make_multiple(head, tail=None, wrap_tail=False):
    """We have a recurring need to parse citations which have a string of
    terms, e.g. section 11(a), (b)(4), and (5). This function is a shorthand
    for setting these elements up"""
    if tail is None:
        tail = head
    head = keep_pos(head).setResultsName("head")
    # We need to address just the matching text separately from the
    # conjunctive phrase
    tail = keep_pos(tail).setResultsName("match")
    tail = (atomic.conj_phrases + tail).setResultsName(
        "tail", listAllMatches=True)
    if wrap_tail:
        tail = Optional(Suppress('(')) + tail + Optional(Suppress(')'))
    return QuickSearchable(head + OneOrMore(tail))


_inner_non_comment = (
    any_depth_p |
    (part_section + Optional(depth1_p)) |
    (atomic.section + depth1_p) |
    appendix_with_section | marker_appendix)


multiple_non_comments = QuickSearchable(
    (atomic.paragraphs_marker | atomic.paragraph_marker |
     atomic.sections_marker | atomic.section_marker) +
    make_multiple(_inner_non_comment, wrap_tail=True))

multiple_section_paragraphs = make_multiple(
    head=section_paragraph, tail=_inner_non_comment)

multiple_period_sections = QuickSearchable(
    atomic.sections_marker +
    make_multiple(head=part_section, tail=period_section))

multiple_appendix_section = make_multiple(
    head=appendix_with_section,
    tail=_inner_non_comment, wrap_tail=True)

multiple_appendices = QuickSearchable(
    atomic.appendices_marker +
    make_multiple(atomic.appendix))

multiple_comments = QuickSearchable(
    (atomic.comments_marker | atomic.comment_marker) +
    make_multiple(
        head=(Optional(atomic.section_marker) + _inner_non_comment +
              Optional(depth1_c)),
        tail=(_inner_non_comment + Optional(depth1_c)) | depth1_c,
        wrap_tail=True))

# e.g. 12 CFR 1005
cfr = QuickSearchable(
    atomic.title + Suppress("CFR") + Optional(Marker("part")) + atomic.part
)
# e.g. 12 CFR 1005.10
cfr_p = QuickSearchable(
    cfr +
    Suppress('.') +
    atomic.section +
    Optional(depth1_p))

# e.g. 12 CFR 1005.10, 1006.21, and 1010.10
multiple_cfr_p = make_multiple(
    head=cfr_p,
    tail=atomic.part + Suppress('.') + atomic.section + Optional(depth1_p))

notice_cfr_p = (
    atomic.title +
    Suppress("CFR") +
    Optional(Suppress(atomic.part_marker | atomic.parts_marker)) +
    OneOrMore(
        atomic.part.copy().setResultsName('cfr_parts', listAllMatches=True) +
        Optional(Suppress(',')) +
        Optional(Suppress('and'))
    )
)