Source code for regparser.tree.depth.markers

"""Namespace for collecting the various types of markers"""
import string

from roman import toRoman


[docs]def emphasize(marker):
    """The final depth levels for regulation text are emphasized, so we keep
    their <E> tags to distinguish them from previous levels. This function
    will wrap a marker in an <E> tag"""
    marker_plain = deemphasize(marker)
    return u'<E T="03">{0}</E>'.format(marker_plain)


[docs]def deemphasize(marker):
    """Though the knowledge of emphasis is helpful for determining depth, it
    is _unhelpful_ in other scenarios, where we only care about the plain
    text. This function removes <E> tags"""
    return marker.replace('<E T="03">', '').replace('</E>', '')


lower = (tuple(string.ascii_lowercase) +
         tuple(a + a for a in string.ascii_lowercase if a != 'i'))
upper = (tuple(string.ascii_uppercase) +
         tuple(a + a for a in string.ascii_uppercase))
ints = tuple(str(i) for i in range(1, 999))
upper_roman = tuple(toRoman(i) for i in range(1, 50))
roman = tuple(r.lower() for r in upper_roman)
em_ints = tuple(emphasize(i) for i in ints)
em_roman = tuple(emphasize(i) for i in roman)


# Distinction between types of stars as it indicates how much space they can
# occupy
STARS_TAG = 'STARS'
INLINE_STARS = '* * *'
stars = (STARS_TAG, INLINE_STARS)

# Account for paragraphs without a marker at all
MARKERLESS = 'MARKERLESS'
markerless = (MARKERLESS,)

types = [lower, upper, ints, roman, upper_roman, em_ints, em_roman, stars,
         markerless]