Source code for regparser.notice.dates

import re
from datetime import datetime

from regparser.tree.xml_parser.tree_utils import get_node_text


[docs]def parse_date_sentence(sentence): """Return the date type + date in this sentence (if one exists).""" # Search for month date, year at the end of the sentence sentence = sentence.lower().strip() date_re = r".*((january|february|march|april|may|june|july|august" date_re += r"|september|october|november|december) \d+, \d+)$" match = re.match(date_re, sentence) if match: date = datetime.strptime(match.group(1), "%B %d, %Y") if 'comment' in sentence: return ('comments', date.strftime("%Y-%m-%d")) if 'effective' in sentence: return ('effective', date.strftime("%Y-%m-%d")) return ('other', date.strftime('%Y-%m-%d'))
[docs]def fetch_dates(xml): """Pull out any dates (and their types) from the XML. Not all notices have all types of dates, some notices have multiple dates of the same type.""" dates_field = xml.xpath('//EFFDATE/P') or xml.xpath('//DATES/P') dates = {} for par in dates_field: for sentence in get_node_text(par).split('.'): result_pair = parse_date_sentence(sentence.replace('\n', ' ')) if result_pair: date_type, date = result_pair dates[date_type] = dates.get(date_type, []) + [date] if dates: return dates