Source code for regparser.grammar.unified

# -*- coding: utf-8 -*-
"""Some common combinations"""
from pyparsing import (FollowedBy, LineEnd, Literal, OneOrMore, Optional,
                       SkipTo, Suppress, ZeroOrMore)

from regparser.grammar import atomic
from regparser.grammar.utils import Marker, QuickSearchable, keep_pos

period_section = Suppress(".") + atomic.section
part_section = atomic.part + period_section
marker_part_section = (
    keep_pos(atomic.section_marker).setResultsName("marker") +
    part_section)

depth6_p = atomic.em_roman_p | atomic.plaintext_level6_p
depth5_p = (
    (atomic.em_digit_p | atomic.plaintext_level5_p) +
    Optional(depth6_p))
depth4_p = atomic.upper_p + Optional(depth5_p)
depth3_p = atomic.roman_p + Optional(depth4_p)
depth2_p = atomic.digit_p + Optional(depth3_p)
depth1_p = atomic.lower_p + ~FollowedBy(atomic.upper_p) + Optional(depth2_p)
any_depth_p = QuickSearchable(
    depth1_p | depth2_p | depth3_p | depth4_p | depth5_p | depth6_p)

depth3_c = atomic.upper_c + Optional(atomic.em_digit_c)
depth2_c = atomic.roman_c + Optional(depth3_c)
depth1_c = atomic.digit_c + Optional(depth2_c)
any_a = atomic.upper_a | atomic.digit_a

section_comment = atomic.section + depth1_c

section_paragraph = QuickSearchable(atomic.section + depth1_p)

mps_paragraph = QuickSearchable(marker_part_section + Optional(depth1_p))
ps_paragraph = part_section + Optional(depth1_p)
part_section_paragraph = QuickSearchable(
    atomic.part + Suppress(".") + atomic.section + depth1_p)


m_section_paragraph = QuickSearchable(
    keep_pos(atomic.paragraph_marker).setResultsName("marker") +
    atomic.section +
    depth1_p)

marker_paragraph = QuickSearchable(
    keep_pos(
        atomic.paragraph_marker | atomic.paragraphs_marker
    ).setResultsName("marker") +
    depth1_p)


[docs]def appendix_section(match): """Appendices may have parenthetical paragraphs in its section number.""" if match.appendix_digit: lst = list(match) pars = lst[lst.index(match.appendix_digit) + 1:] section = match.appendix_digit if pars: section += '(' + ')('.join(el for el in pars) + ')' return section else: return None
appendix_with_section = QuickSearchable( atomic.appendix + '-' + (atomic.appendix_digit + ZeroOrMore(atomic.lower_p | atomic.roman_p | atomic.digit_p | atomic.upper_p)).setParseAction( appendix_section).setResultsName("appendix_section"), # optimization: encode the regex force_regex_str=r"[A-Z]+[0-9]*\b\s*-") appendix_with_part = QuickSearchable( keep_pos(atomic.appendix_marker).setResultsName("marker") + atomic.appendix + Suppress(",") + Marker('part') + atomic.upper_roman_a + Optional(any_a) + Optional(any_a) + Optional(any_a)) marker_appendix = QuickSearchable( keep_pos(atomic.appendix_marker).setResultsName("marker") + (appendix_with_section | atomic.appendix)) marker_part = ( keep_pos(atomic.part_marker).setResultsName("marker") + atomic.part) marker_subpart = ( keep_pos(atomic.subpart_marker).setResultsName("marker") + atomic.subpart) marker_subpart_title = ( keep_pos(atomic.subpart_marker).setResultsName("marker") + atomic.subpart + Optional(Suppress(Literal(u"—"))) + SkipTo(LineEnd()).setResultsName("subpart_title") ) marker_comment = QuickSearchable( keep_pos(atomic.comment_marker).setResultsName("marker") + (section_comment | section_paragraph | ps_paragraph | mps_paragraph) + Optional(depth1_c) )
[docs]def make_multiple(head, tail=None, wrap_tail=False): """We have a recurring need to parse citations which have a string of terms, e.g. section 11(a), (b)(4), and (5). This function is a shorthand for setting these elements up""" if tail is None: tail = head head = keep_pos(head).setResultsName("head") # We need to address just the matching text separately from the # conjunctive phrase tail = keep_pos(tail).setResultsName("match") tail = (atomic.conj_phrases + tail).setResultsName( "tail", listAllMatches=True) if wrap_tail: tail = Optional(Suppress('(')) + tail + Optional(Suppress(')')) return QuickSearchable(head + OneOrMore(tail))
_inner_non_comment = ( any_depth_p | (part_section + Optional(depth1_p)) | (atomic.section + depth1_p) | appendix_with_section | marker_appendix) multiple_non_comments = QuickSearchable( (atomic.paragraphs_marker | atomic.paragraph_marker | atomic.sections_marker | atomic.section_marker) + make_multiple(_inner_non_comment, wrap_tail=True)) multiple_section_paragraphs = make_multiple( head=section_paragraph, tail=_inner_non_comment) multiple_period_sections = QuickSearchable( atomic.sections_marker + make_multiple(head=part_section, tail=period_section)) multiple_appendix_section = make_multiple( head=appendix_with_section, tail=_inner_non_comment, wrap_tail=True) multiple_appendices = QuickSearchable( atomic.appendices_marker + make_multiple(atomic.appendix)) multiple_comments = QuickSearchable( (atomic.comments_marker | atomic.comment_marker) + make_multiple( head=(Optional(atomic.section_marker) + _inner_non_comment + Optional(depth1_c)), tail=(_inner_non_comment + Optional(depth1_c)) | depth1_c, wrap_tail=True)) # e.g. 12 CFR 1005 cfr = QuickSearchable( atomic.title + Suppress("CFR") + Optional(Marker("part")) + atomic.part ) # e.g. 12 CFR 1005.10 cfr_p = QuickSearchable( cfr + Suppress('.') + atomic.section + Optional(depth1_p)) # e.g. 12 CFR 1005.10, 1006.21, and 1010.10 multiple_cfr_p = make_multiple( head=cfr_p, tail=atomic.part + Suppress('.') + atomic.section + Optional(depth1_p)) notice_cfr_p = ( atomic.title + Suppress("CFR") + Optional(Suppress(atomic.part_marker | atomic.parts_marker)) + OneOrMore( atomic.part.copy().setResultsName('cfr_parts', listAllMatches=True) + Optional(Suppress(',')) + Optional(Suppress('and')) ) )